From b072ab227b7ffdb726455d5dd024bca6c380757f Mon Sep 17 00:00:00 2001 From: "Dr. Kashif Rasul" Date: Thu, 17 Dec 2020 17:04:56 +0100 Subject: [PATCH] initial gluonts dependency --- examples/m5.ipynb | 198 +---- pts/__init__.py | 3 +- pts/core/__init__.py | 9 - pts/core/_base.py | 29 - pts/core/component.py | 171 ---- pts/core/serde.py | 374 -------- pts/dataset/__init__.py | 32 - pts/dataset/artificial.py | 834 ------------------ pts/dataset/common.py | 95 -- pts/dataset/file_dataset.py | 133 --- pts/dataset/list_dataset.py | 42 - pts/dataset/loader.py | 224 ----- pts/dataset/multivariate_grouper.py | 211 ----- pts/dataset/process.py | 116 --- pts/dataset/recipe.py | 604 ------------- pts/dataset/repository/__init__.py | 15 +- pts/dataset/repository/_artificial.py | 48 - pts/dataset/repository/_gp_copula_2019.py | 160 ---- pts/dataset/repository/_lstnet.py | 197 ----- pts/dataset/repository/_m4.py | 85 -- pts/dataset/repository/_m5.py | 63 +- pts/dataset/repository/_util.py | 78 -- pts/dataset/repository/datasets.py | 182 +--- pts/dataset/stat.py | 357 -------- pts/dataset/transformed_iterable_dataset.py | 47 - pts/dataset/utils.py | 148 ---- pts/distributions/implicit_quantile.py | 17 +- pts/distributions/zero_inflated.py | 5 +- pts/evaluation/__init__.py | 2 - pts/evaluation/backtest.py | 221 ----- pts/evaluation/evaluator.py | 730 --------------- pts/exception.py | 3 - pts/feature/__init__.py | 19 - pts/feature/holiday.py | 245 +---- pts/feature/lag.py | 139 --- pts/feature/time_feature.py | 206 ----- pts/feature/utils.py | 65 -- pts/model/__init__.py | 7 +- pts/model/deepar/deepar_estimator.py | 34 +- pts/model/deepar/deepar_network.py | 10 +- pts/model/deepvar/deepvar_estimator.py | 12 +- pts/model/deepvar/deepvar_network.py | 24 +- pts/model/estimator.py | 162 ++-- pts/model/forecast.py | 552 ------------ pts/model/forecast_generator.py | 195 ---- pts/model/lstnet/lstnet_estimator.py | 8 +- pts/model/lstnet/lstnet_network.py | 8 +- pts/model/n_beats/n_beats_ensemble.py | 1 + pts/model/n_beats/n_beats_estimator.py | 31 +- pts/model/n_beats/n_beats_network.py | 14 +- pts/model/predictor.py | 190 ---- pts/model/quantile.py | 98 -- pts/model/simple_feedforward/__init__.py | 1 - .../simple_feedforward_estimator.py | 28 +- .../simple_feedforward_network.py | 11 +- pts/model/tempflow/tempflow_estimator.py | 19 +- pts/model/tempflow/tempflow_network.py | 18 +- pts/model/transformer/__init__.py | 2 +- .../transformer/transformer_estimator.py | 14 +- pts/model/transformer/transformer_network.py | 15 +- .../transformer_tempflow_estimator.py | 28 +- .../transformer_tempflow_network.py | 24 +- pts/model/utils.py | 49 +- pts/modules/__init__.py | 4 - pts/modules/distribution_output.py | 142 +-- pts/modules/feature.py | 6 +- pts/modules/iqn_modules.py | 2 +- pts/modules/lambda_layer.py | 10 - pts/modules/scaler.py | 2 +- pts/trainer.py | 41 +- pts/transform/__init__.py | 52 -- pts/transform/convert.py | 713 --------------- pts/transform/dataset.py | 47 - pts/transform/feature.py | 257 ------ pts/transform/field.py | 118 --- pts/transform/sampler.py | 176 ---- pts/transform/split.py | 529 ----------- pts/transform/transform.py | 3 +- setup.py | 4 +- test/dataset/test_common.py | 30 - test/dataset/test_multivariate_grouper.py | 129 --- test/dataset/test_process.py | 21 - test/dataset/test_stat.py | 340 ------- test/evaluation/test_evaluator.py | 649 -------------- test/feature/test_lag.py | 311 ------- test/modules/test_distribution_output.py | 2 +- .../test_implicit_quantile_distr_output.py | 11 +- test/test_transform.py | 808 ----------------- 88 files changed, 498 insertions(+), 11571 deletions(-) delete mode 100644 pts/core/__init__.py delete mode 100644 pts/core/_base.py delete mode 100644 pts/core/component.py delete mode 100644 pts/core/serde.py delete mode 100644 pts/dataset/__init__.py delete mode 100644 pts/dataset/artificial.py delete mode 100644 pts/dataset/common.py delete mode 100644 pts/dataset/file_dataset.py delete mode 100644 pts/dataset/list_dataset.py delete mode 100644 pts/dataset/loader.py delete mode 100644 pts/dataset/multivariate_grouper.py delete mode 100644 pts/dataset/process.py delete mode 100644 pts/dataset/recipe.py delete mode 100644 pts/dataset/repository/_artificial.py delete mode 100644 pts/dataset/repository/_gp_copula_2019.py delete mode 100644 pts/dataset/repository/_lstnet.py delete mode 100644 pts/dataset/repository/_m4.py delete mode 100644 pts/dataset/repository/_util.py delete mode 100644 pts/dataset/stat.py delete mode 100644 pts/dataset/transformed_iterable_dataset.py delete mode 100644 pts/dataset/utils.py delete mode 100644 pts/evaluation/__init__.py delete mode 100644 pts/evaluation/backtest.py delete mode 100644 pts/evaluation/evaluator.py delete mode 100644 pts/exception.py delete mode 100644 pts/feature/lag.py delete mode 100644 pts/feature/time_feature.py delete mode 100644 pts/feature/utils.py delete mode 100644 pts/model/forecast.py delete mode 100644 pts/model/forecast_generator.py delete mode 100644 pts/model/predictor.py delete mode 100644 pts/model/quantile.py delete mode 100644 pts/modules/lambda_layer.py delete mode 100644 pts/transform/__init__.py delete mode 100644 pts/transform/convert.py delete mode 100644 pts/transform/dataset.py delete mode 100644 pts/transform/feature.py delete mode 100644 pts/transform/field.py delete mode 100644 pts/transform/sampler.py delete mode 100644 pts/transform/split.py delete mode 100644 test/dataset/test_common.py delete mode 100644 test/dataset/test_multivariate_grouper.py delete mode 100644 test/dataset/test_process.py delete mode 100644 test/dataset/test_stat.py delete mode 100644 test/evaluation/test_evaluator.py delete mode 100644 test/feature/test_lag.py delete mode 100644 test/test_transform.py diff --git a/examples/m5.ipynb b/examples/m5.ipynb index dca854e..a115f84 100644 --- a/examples/m5.ipynb +++ b/examples/m5.ipynb @@ -2,12 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", - "import json" + "import json\n", + "from functools import partial" ] }, { @@ -30,31 +31,53 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ - "from pts.dataset.repository import get_dataset\n", - "from pts.dataset.utils import to_pandas" + "from gluonts.dataset.repository.datasets import get_dataset\n", + "from gluonts.dataset.util import to_pandas\n", + "from gluonts.evaluation import Evaluator\n", + "from gluonts.evaluation.backtest import make_evaluation_predictions" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "dataset = get_dataset(\"m5\", regenerate=False)" + "from pts.model.deepar import DeepAREstimator\n", + "from pts.modules import ZeroInflatedNegativeBinomialOutput\n", + "from pts import Trainer" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "saving time-series into /Users/krasul/.mxnet/gluon-ts/datasets/pts_m5/train/data.json\n", + "saving time-series into /Users/krasul/.mxnet/gluon-ts/datasets/pts_m5/test/data.json\n" + ] + } + ], + "source": [ + "dataset = get_dataset(\"pts_m5\", regenerate=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA89klEQVR4nO2dd5wV1d3/32cLLMvSmwjqoohRkCIgGoKCRESJJaZpyqOJJXn98kRTJGKSJ0V9lCcaY+8aTWINdgFFlBWUJk2kLyC9991ld9nde35/zNy7c+fOvTNz79wG3/frta+9M3PK90z5zJnvaUprjSAIgpB/FGTbAEEQBCE5RMAFQRDyFBFwQRCEPEUEXBAEIU8RARcEQchTRMAFQRDyFBFwQRCEPEUEXBAEIU8RARfiopTaoJSqVUpVW/6OV0q1VErdrZTaZB6vVEqNV0opW/xvKKXmK6VqlFJ7lVIvKKV6Wo5fq5RqsqT9pVLqH0qpPrZ0rlNKrVJKVSmldiqlpiil2rjY/iul1Hql1CGl1Dal1N+VUkU+yn6+Ukorpe607T9ZKfWuacsepdRfLcfKTdv2K6V2KKUedstTKfW+UupWy3YPM1+nfccppUYqpbZ4sP9e87pUmefuv7yWXcgfRMAFNy7VWpdZ/rYB/wFGA5cAbYAfATcCD4QjKaW+DbwI3A90BvoC9cAnSqkOlvTnaK3LgHbA14FaYKFSqp+ZzvnAXcDVWus2wOnAKx7sfhs4S2vdFugHDABu8lJgpVSxWZZ5tv0tgA+Aj4DjgJ7Avy1BHgV2Ad2BgcD5wP9zyW4mcJ5l+zxglcO+Sq31Di/2m9QAl2Kc12uAB5RSX/URX8gDRMAFXyilRgNjgG9prZdprRu11nOBHwI/V0r1NmvifwPu1Fq/qLWuNcXneqAa+JU9Xa11k9Z6ndb6/wEfA382Dw3FEPnFZrh9WuvntdZView00zoQNhsIAb09FvM3wDQMIbVyLbBNa32f1rpGa12ntV5qOd4LeNXcvwN4D+PFlYiZwHClVPhZHIHx0hti2zfTo+0AaK3/pLVepbUOaa3nAbOAc/2kIeQ+IuCCXy4E5mmtN1t3miKxBaNmfhpwIkZN3RomBLxmppGI1zFEC4xa8EVKqb8opYYrpVp6NVQp9X2l1CFgD0YN/AkPcU4CfgLc7nD4HGCDUmqq6T6pUEqdaTl+P3CVUqpUKdUDuBhDxBMxH2hp2gdGbfsDYK1tny8Bt6KUaoXxIlyebBpCbiICLrjxplLqgPn3JoY7ZHucsNvN450t2/HCJGIb0BFAaz0LuBI4C5gM7FVK3aeUKnQz3Kz9twX6AI8DO93iAA8C/6O1rnY41hO4ygxzvGnPW6ZrBQyR7QscwniZLQDedLGxHuMldZ5SqiPQTmu9HqPGHN53BsZXSbI8DnwOvJ9CGkIOIgIuuHGF1rq9+XcFRm22e5yw3c3jeyzb8cIkogewL7yhtZ6qtb4UQ9Qvx3BlXO+1AFrrSoza56OJwimlLgXaaK3j+dhrgU9Me44A9wKdgNNNd8d7GF8PrTFeUh2A//NgYtgPPgL41Nz3iWXfZq31Rg/pOJXpHow2gO9qmXr0qEMEXPDLdGCYUuoE606l1DDgBIwGvtUYNdDv2MIUAN8CPnTJ45sYNdAoTH/uh2Ye/XzaXQSc4hJmNIbveYdSagfwPeCXSqm3zONLgXgi2BHDbfSw1rpea70X+AdGQ68bMzGE+jyay/0pMJwU3CdKqb9guHHGaK0PJZOGkONoreVP/hz/gA3A1x32T8bw3fYFCjF8w5XAI5Yw38NwJXwfKMHotfEssAnoZIa5FqNGi5lOL+AhjIbOM839l2O4LTpgNEaeDewGfuBi+/VAV/P3GRg18Ptc4rQx7Qz/vQL8HehoHj8NOIzRW6YQozF2HdDCPL4emIDxsmgPvAG86OE8twaOYLh4+lv2f2Huu8GybySwxUOat5nX5Lhs30fyl74/qYELyfAtYAaGy6AaoyvdM8AvwgG04Yb4EYbI7QVWAK2A4dqonYY5VylVjSH2FUBbYKjW+gvz+H7gBgwxOmTmdY/W+gUXG4cDXyilaoAp5t/vEkXQWldprXeE/zBcJjVa633m8dUYvW0eN+26HLhMG+4UMHz1YzFeMGuBBhx63DjkWwMsBFoAyyyHZgFdSa4GfhfGF8FaSz/7hOUX8g+ltbjFBEEQ8hGpgQuCIOQpIuBC3qKUWm4b5h/++0GCOCPixHHqNhiUnT+Ik2dK/bLjlUMpNcI9tnA0IC4UQRCEPMXz5D5B0L59e927t7fRzDU1NbRu3TrpvCS+xJf4x2b8VPMOIo2g4y9cuHCP1rpLTMBMdnnp06eP9sqMGTM8h5X4El/iS/yg8g4ijaDjAwu0dCMUBEE4ehABFwRByFNEwAVBEPKUjDZiOtHQ0MCWLVuoq6uL2t+uXTtWrlyZdLoSPzZ+SUkJPXv2pLi4OOl0BUHIHbIu4Fu2bKFNmzaUl5ejLCtyVVVV0aZNwlWzEiLxo+Nrrdm7dy9btmyhV69eSacrCELukHUXSl1dHZ06dYoSbyF4lFJ06tQp5ktHEIT8JesCDoh4Zwg5z4JwdJETAp5r1DU0UdcoI1QFQchtjnkBP3DgAI8+Gr1Qy5qdVWyvCbnGveSSSzhw4ECaLIvlq1+VRcUFQWhGBNxBwMM0NjYmjDtlyhTat2+fBquc7Zg9e3ba8xIEIX845gV8woQJrFu3joEDBzJ+/HgqKiq49sqLuenHV3PGGWcAcMUVVzB48GD69u3Lk08+GYlbXl7Onj172LBhA6effjo33HADffv2ZcyYMdTW1sbk9Z///Id+/foxYMAAzjvvPACampoYP348Q4cOpX///jzxhLFw+qxZsxgxYgSXXXZZxI6ysrJIWvfcc08kzp/+9CfAmD9h3LhxDBgwgGHDhvHKK/GWdhQE4Wgg690IrfzlneWs2GYs3dfU1ERhoevC43EJxz/j+Lb86dK+ccNNnDiRZcuWsWTJEgAqKipYuWwpr02fzSVfHQDAs88+S8eOHamtrWXo0KF861vfolOnTlHpVFZW8tJLL/HUU0/x3e9+l7feeosbbrghKsztt9/O+++/T48ePSKul2eeeYZ27drx2WefUV9fz/DhwxkzZgwAixYtYtmyZTHd/qZNm0ZlZSXz589Ha81ll13GzJkz2b17N8cffzyTJ0+mqqqKUMjdDSQIQv5yzNfAneg38Cx6nnhSZPvBBx9kwIABnHPOOWzevJnKysqYOL169WLgwIEADB48mE2bNsWEGT58ONdeey1PPfUUTU1NgCHG//znPxk4cCDDhg1j7969kfTPPvtsxz7b06ZNY9q0aQwaNIizzjqLVatWUVlZyZlnnskHH3zArbfeyuzZs2nXrl0Qp0MQhBwlp2rg1ppyNgfCtCotjfyuqKhg+vTpzJkzh9LSUkaOHOnYl7ply5aR34WFhY7+88cff5x58+YxefJkBg8ezMKFC9Fa89BDD3HRRRdFhZ0yZUrc6Si11tx222389Kc/jTm2aNEipkyZwh133MG8efP44x//6LncgiDkF8d8DbxNmzZUVVXFPX7w4EE6dOhAaWkpq1atYu7cuUnntW7dOoYNG8btt99Oly5d2Lx5MxdddBGPPfYYDQ0NAKxZs4aampqE6Vx00UU8++yzVFcbi8hs3bqVXbt2sW3bNkpLS/nhD3/ITTfdxKJFi5K2VRCE3CenauDZoFOnTgwfPpx+/fpx8cUXM27cuKjjY8eO5fHHH+f000/ntNNO45xzzkk6r/Hjx1NZWYnWmtGjRzNgwAD69+/Phg0bOOuss9Ba06VLF958882E6YwZM4aVK1dy7rnnAkbj5r///W/Wrl3L+PHjKSgooKCgIKrBVRCEo49jXsABXnzxxajth3sPjPxu2bIlU6dOdYy3YcMGADp37syyZcsi+2+55RbHWv3rr78es08pxV133cVdd90VtX/EiBFccsklUfvCNW6Am2++mZtvvjnq+CmnnBJxxaTqghIEIfc55l0ogiAI+YoIuCAIQp6SEwJuLPkmpBs5z4JwdJF1AS8pKWHv3r0iLmkmPB94SUlJtk0RBCEgst6I2bNnT7Zs2cLu3buj9tfV1aUkNqnE37nfGAa/sqpVVvJPV/zwijyCIBwdZF3Ai4uLHUcbVlRUMGjQoKTTTSX+xRMmA7Bh4jiXkOnJPxfiC4KQ+2TdhSIIgiAkhwi4IAhCniICLgiCkKeIgAuCIOQpIuCCIAh5igi4IAhCniICLgiCkKeIgAuCIOQpIuCCIAh5igi4IAhCniICLgiCkKeIgAuCIOQpngVcKVWolFqslHrX3O6llJqnlFqrlHpFKdUifWYKgiAIdvzUwG8GVlq2/w/4u9a6N7AfuC5IwwRBEITEeBJwpVRPYBzwtLmtgAuASWaQ54Er0mCfIAiCEAflZSUcpdQk4G6gDXALcC0w16x9o5Q6AZiqte7nEPdG4EaALl26DH711Vc9GVZdXU1ZWZm3UgQc/9r3agB4bmzrrOQv8SW+xE8+fqp5B5FG0PFHjRq1UGs9JCag1jrhH/AN4FHz90jgXaAzsNYS5gRgmVtaffr00V6ZMWOG57BBxz/p1nf1Sbe+m7X8Jb7El/jJx0817yDSCDo+sEA7aKqXFXmGA5cppS4BSoC2wANAe6VUkda6EegJbE3yZSMIgiAkgasPXGt9m9a6p9a6HLgK+Ehr/QNgBvBtM9g1wFtps1IQBEGIIZV+4LcCv1ZKrQU6Ac8EY5IgCILgBV+LGmutK4AK8/d64OzgTRIEQRC8ICMxBUEQ8hQRcEEQhDxFBFwQBCFPEQEXBEHIU0TABUEQ8hQRcEEQhDxFBFwQBCFPEQEXBEHIU0TABUEQ8hQRcEEQhDxFBFwQBCFPEQEXBEHIU0TABUEQ8hQRcEEQhDxFBFwQBCFPEQEXBEHIU0TABUEQ8hQRcEEQhDxFBFwQBCFPEQEXBEHIU0TABUEQ8hQRcEEQhDxFBFwQBCFPEQEXBEHIU0TABUEQ8hQRcBta62ybIAiC4AkRcBtNIRFwQRDyAxFwG01SAxcEIU8QARcEQchTRMBtSAVcEIR8QQRcEAQhTxEBFwRByFNEwAVBEPIUVwFXSpUopeYrpT5XSi1XSv3F3N9LKTVPKbVWKfWKUqpF+s0VBEEQwnipgdcDF2itBwADgbFKqXOA/wP+rrXuDewHrkublYIgCEIMrgKuDarNzWLzTwMXAJPM/c8DV6TDwEwjvVAEQcgXlJeh40qpQmAh0Bt4BLgHmGvWvlFKnQBM1Vr3c4h7I3AjQJcuXQa/+uqrngyrrq6mrKzMYzGCi1/fpPnpB4cBeG5s64znL/ElvsRPLX6qeQeRRtDxR40atVBrPSQmoNba8x/QHpgBfA1Ya9l/ArDMLX6fPn20V2bMmOE5bJDxa+ob9Em3vqtPuvXdrOQv8SW+xE8tfqp5B5FG0PGBBdpBU331QtFaHzAF/FygvVKqyDzUE9jq7x0jCIIgpIKXXihdlFLtzd+tgAuBlRhC/m0z2DXAW2myURAEQXCgyD0I3YHnTT94AfCq1vpdpdQK4GWl1J3AYuCZNNqZMaQRUxCEfMFVwLXWS4FBDvvXA2enwyhBEATBHRmJKQiCkKeIgNsQD4ogCPmCCLggCEKeIgJuQ0srpiAIeYIIuCAIQp4iAp6Azzbso66hKdtmCIIgOCICbsPqQPnO43P4w5vLsmaLIAhCIkTAXVi141C2TRAEQXBEBNyGvQ1TobJjiCAIggsi4IIgCHmKCLggCEKeIgJuR7qBC4KQJ4iAu6DEBS4IQo4iAi4IgpCniIDb0EeJD+Uvc2r59atLovb95LnP+K9n52fHIEEQAkcE/Cjly4MhXl8UvcrdR6t2MXPN7ixZJAhC0IiA24jtBy4IgpCbiIALgiDkKSLggiAIeYoIuI2jowlTEIRjARFwN6QjuCAIOYoIuA1ZkUcQhHxBBFzwzcKN+5m9dk+2zRCEY56ibBsg5B/femw2ABsmjsuyJYJwbCM1cBt2B4p4wAVByFVEwAVBEPIUEXCBzfsOs/1gbbbNEATBJ+IDt3EsdkIZ8dcZgPi0BSHfkBq4IAhCniICbsM+nayM4xEEIVcRARcEQchTRMAFQRDyFBFwO0dBI6ZMByAIxwauAq6UOkEpNUMptUIptVwpdbO5v6NS6gOlVKX5v0P6zc08+egCF/0WhGMDLzXwRuA3WuszgHOAnyulzgAmAB9qrU8FPjS3hRwgJAouCMcErgKutd6utV5k/q4CVgI9gMuB581gzwNXpMnGjHI0SJ9bGd5YvCXh8WnLd/CT92vo/bspvDx/U3CGCYIQKMqPv1QpVQ7MBPoBm7TW7c39Ctgf3rbFuRG4EaBLly6DX331VU95VVdXU1ZW5tm2oOLvrwvxq4rmUYm92xfwh3NaZSz/IOI3hjTXTzsMwHNjW0f2X/teDQBtiuGh0bH7w2H/8MlhtlQb90WRgqcvag7rFD5o+yW+xE8lfqp5B5FG0PFHjRq1UGs9JCag1trTH1AGLASuNLcP2I7vd0ujT58+2iszZszwHDbI+NsP1OqTbn038nflo59mNP8g4tceaYzYbyW8b8Bf3nfcH2bMfR9H9p36+ykx6TulHaT9El/ipxI/1byDSCPo+MAC7aCpnnqhKKWKgdeAF7TWr5u7dyqlupvHuwO7knzZCBnG7aPLOngpHxtxBeFYwUsvFAU8A6zUWt9nOfQ2cI35+xrgreDNE+y8NH8Tk5duTxjGTaD9NHLKSFRByF28TGY1HPgR8IVSaom573fAROBVpdR1wEbgu2mxMMPYh9LnGre9/gUA4/rHn3jKTaClk4ogHB24CrjW+hPif0mPDtac3CMfK6Bu+uyrBp6XZ0AQjg1kJKaNo6F2ql0K4SbgyuI3EReKIOQuIuB5hJswhwlZgi3etD/hcTdEvwUhdxEBzyNmr9vrLaBFoL/56OyEx91QUgUXhJxFBNxGzKLGOaRfNfWNnsK5NcS6ulA8WyQIQjYRAc8jvNaG3Vwk/hoxBUHIVY5pAdda8/nmA57D7zpUl9XFf72KqXsjZvPvzfsOR34vMc9F1HtCFFwQksKvviTDMS3gr3y2mcsf+ZQPVuyM7Eskfmff9SHn3v1RJkxzxKs7x16CXYfq4oYNL2gMcMUjnwJw4HBDc56erRMEwcoL8zZx+SOfMmNV+gapH9MCvmZnNQAb99bEDZNL/aC9CrjdRVLXEPKVT1WdRcBzqRFAEPKI1TuqANhk+coNmmNawJ0a+3K5H7jnl4mtDEWF/kTYGl30WxCSI6wv6XyGjlkBbwppmkLhE5xbKnWk0V+N2Y79HdTQFIqU1W8CbvFCIU1DU2r2CsLRTDrV5ZgV8EsemMU/52x0DVfb0JQBa5p5a8lW+vxhKturHUTRawXcprnn31PBlY9+6tkGa/SqusRdF296eTGn/n6q57QF4VghE1/zx6yAr95Z5SlcXYYF/P3lOwDYXBUr4F7f5E7dBD/fctCzDV5HfAK86zIzoiAcq4SfonR+4R+zAm4l0enNtHclfLGdJNTrjZDqiz+HmwEEIW8IV4TEB55mrCfYXvls9OM7DsKWOHZYj7nhpwbtHD+l6IIg0PwcpbMnmwi4C6EMCvg/Pv0y4pJ4fGl91LFvPTab/3p2vqd0UhHg1TuqYvz+5RMms2lv+rpC2dl1qI4bP6hh+Tbvbp9xD87i6VnrPYd/dcFmRt1bkYR1guCNiIBLDTx7NGWwOvqXd1bEPbZwY+ysgvFIxeQX5zk37L79+dbkE/XJjNW7ONIEz326wXOc5dsOcefklZ7D/3bSUr7cU5Py14ogxCPSjTCNeYiAE32C7X3DQ3nYQy7XVxVyI5ODp0S/hXQhNfAcoDEPFTwdXp9s9JXPhLaKfgvpItILRXzg6eeVzzbx4cqdMfuzOUblvWU7mLRwi+94XtwCOT34xrzfJy3cEplgK12IC0VIF7pZwdOGCDhG7fLW177guucXxHxS+5l6NWh+9u+F3PKfz33H82LxhyvTN8FOqljv9/AEW+lC5FtIF+IDzwEac7mmGgcvtcp4YXJhWoFM2iAVcCFtRHzg4kLJGPbnOcPdwAMhHaKUA7qeFvK9wVfIXcJf7wXiQonPjoN1TF8R67sOCl+TQOUIXixOplTTV+zklc82Rbbnrfe4RqcLb3++jYO1wc5BPm97I/PW72XBhn0Jw6XjZbd532GW7va2/F02Wb7tIIscFr0WgqF5KH368ihKX9KZ4ZuPfsr2g3VsmDgu6TSsJ9g+98nI07okna4fghwo48VvHy9IIvfL9f9cELX9vSfnRsVL5lNx7a5qbnppMV8/vStPXzMUSP2G37T3MI99Xs9jnxv2pXJvJMOoeytoDGlu+k5Gs/XNuAc/ATJ/fo4VZCSmB7YfjL/aTDLY9evkLq0DTT8efmY9dBM4L7XKoF0HydZkwy9M63VMVcDrG72fy3TUwDM9/YKQm2SiBp73Ah4mXd3BMtXI5UdQ3e4HTwIerwYeN8/EuQbZWyejA3nEBy6kiUx0UT2KBDz5uIlGYjpx4PARdlfVs7uq3jWsV4K81p5cKAHbUZdgEQqtNat2HEouYae8Gpr4ck+CZfB8VHm0hq0HajloWQf0WGbnoTr21RzJthlHBTKdrA9SqgFaTrA9GadUz7n7Q4b+73SG/u/05PO04cf8IG4Iv7UDtxfbTS8tjnvstUVbGXv/LMeBUsnw8xcWGX5ms4unvSx+To8Ghk/8iAv+VhGIbfnOsLs+5Kw7Psi2GUcHER94+jhqBDyoCqwXXfO7SLCnfDPsQvFrh1uaH6/ZHffYarP2vXZXtScb3AR4VuUeoNnXbLfNzwMTFv+9UusU8pCjRsCD8sHa08nFgR5uMySm0gslbp4uDXPJdrdM5vyGBT48HYA9CV8uFP/ZC4InZFFjHwTlA8/W0Hk/2bqFTaUE8dJOd394603uJsDhw41Nhk01R5Lvc52LL2jh6CB8bxWIDxxmrtlN+YTJcSf51xpeX7SF8gmT2XqglvEfH2bIndP5pHIP5RMms2yrt8UB7M+zF9fGz/61kPIJk6P27ToconzCZCanec3I8gmTGW+ZL6WuocnT/CG/fGWJ4/54pU1FwO3LxD09az3lEyZHDd5xo3zCZP789nKg+YFoaAoxY/Uu+v95WnR+foxLUKx/zd1I+YTJ7KkOrrF6xbZDlE+YnNDllAtc+15NzD0t+CNcGbTej+UTJvOzfy0MLI+8EfCPVhmTL83/0nlkXUhr3lhsLDqwdlc1u2s1e6rrmbHaiDdnnbdRg8nUyN4zFyK2svGQ8Xn/zufb/Cfok/9YZiw85EMUnYhbA0+hqmpfJu6FecZozj3V9Y4vyHgC/NzsDYBFwEOaJZsOxMb31YgZv1z/WbAZgK37a70n6MKCjcb9m87Rw0JuEG8+cCe9SBZXAVdKPauU2qWUWmbZ11Ep9YFSqtL83yEwi+IQfmjj1QStrg/r+SoqNLYSDa6IXhPTQzcUD4TnP/AqfEF9yqfaQyVe75SUlpYzTbKLpYr63bzlVoRmF0rIMayffuReihWkl0VcNscOzooULF5q4M8BY237JgAfaq1PBT40t9NKWBDj+ag1zm+8ooKw8HvrORLU8xW212t3vaB876m629LhAw8Lajhtt3NiF2B7eKsLxUms/fToyfR84JlYqVzIDXJiRR6t9UzA7re4HHje/P08cIWXzPbXNz8sCzfuY/jEj6iuj26AmlW5m/PvmcGRpugH6+lPvgTg5fmbufLRWB+vDjk/uIUFRhEbQ5pfvLSYiVNXccG9FVFhfv9G5OMixkXzxMz11B7xPjQ7TPiapbPtzyo+4QV9s6kLv3vjC8f9iW5gv6NGJy3cEvGdNzRpx7Sd0vz1q0u4493YNUeXbnFvG3ni43XuRjrgtduk4J9HZqylfMJkyidMzuHz7NzNFeCXL8cfN+GHZCez6qa1DrfO7QC6xQuolLoRuBGgrNuJVFRUADBxfi1bD4T417sfc3qnwkj4CbMOs6NGs2mvjoS1st4cgWc/NuuTT9i/35hPY+nnSyP7N2/cAMC6Lzfwzjp3//A976+O2ffy1Ap6tSt0CB1th/V3fV0doNi9Z69jOeysPxD/JWGPf1LbAioqKqJq7XdOXknvpk0cOtK8b/SJRXRpVcDLq537ODvZtW27c6Nr9W73RY1fnLeJMR2Mtobq6upI+ps2GvmvX7+eCrWF2lrDpzx//nwONxr2VldVRcKv2BH9Uq/4uNlO6wIX8z/7jA27o89bRUUFO2pCMfteX2TcNyPKoheyuOONhVHhIuWtrqaqyrjmU5ft8HQN7dw96VN+dEbLqH2VG4x7cOvWrVRU7Ikb13r+ksFP/EThkrUh3fbf837zSFz7eU417yDSqK6uZvceQ4+WLVtGyZ5VUcffXLKNK46LX3nwmn/KsxFqrbVSKm49Smv9JPAkQOeT+uiRI0cC8PiaObBvHwMGDuCrp3SOhC9dUAE1NbQuLSUcFoD3olvEI8fM/V8dPpwXNy6CvXsZMGAALJgHQO9TToa1q+nR80RYl1xNavDgwfTv2T7Ghogd5n6rvV/8ZzpQT/sOHRg5cphrHu027Ye5sx2PWfMAaNumDSNHfo0jjSF4f2pUuD3V9fCRMUL0uosGc6i2kZdXO7d629MFOO6442Br7DJuvU4+GVavitnvmCbGgx/+Pb9uFXy5jl69ejFy5Km0+mwGHD7MsGHDjEbXOZ/Spq1RJoDaL7bDkkWRNM877/yocoYZPHgIh1btgso1Ufmv210Nsz52LKf9vmld1gYOHow+Ztrftk2R4zFXzLR79OjByJH9og59+emXsGoFJ/TsyciRfeMmYT1/yeApvsN9a78fzj///KTaVdJuv8VO+3lONe8g0qioqKBTp9awexd9+/Zl5JndjQMWuxOl7zX/ZHuh7FRKdQcw/3tbn8vD53LIo4/Q7ruMasRMwQfunJf/OOGbPuh+5YWW2eGd0rbuUiiO+FxRKG43wlR6oUTaA2LzcErVfu3j5RzS3l0o8UjnzIFOduRbI6ZMrJg8YY1K5ylMVsDfBq4xf18DvBWMObaHW2vueX8VHzh0uXr4o7XR8XTzwzHh9WYXyt1TjVrjU7O+TNqmJq25e+pKX3Eija4e9TNe90indLcdqOWsOz7g07Wxn+Da9iKrTzBN7e3vxPqE4wlMKr1QIo2YMfudf9u3rDa1KCqI2u9cO/Ruq7Vct05a6ru9443FW6hY7Vx/Wbe7mgc/rERrTVNIc/eUlew2+5TPXb+Xl+dvcoyXS2RzTVivvLl4a6S7cC5hP3M19cEv8uGlG+FLwBzgNKXUFqXUdcBE4EKlVCXwdXPbF/EXFDANU7B6ZxWPzFjHDbaFBAD+9sEaw4UQiacjjZib9wXXbxdg0cb9PPHxel9xmhsxvT0A4ReNa7pKsbfmCPtqjnDd87HnxZqbAsb2Oy5uWs9+6v2llsrSoF6+wBOdJWvjdLHLFwj4q+VavyxeWbCZFdstsyZ6MPxXr3zOtf/4zPHY7HV7ue+DNRw43MC89Xt5YuZ6Hqsw3HirdlQx4XXnRt9cIg/0m0N1jfw4zjXIJuG6QfgcPjnTn4Z4wdUHrrW+Os6h0UEYYO8GZn0o3Wqv1gc7pNN3syWTboHNbRAUhS6iYj1/BQWKNiXFPHT1IH6RYLZAK/G64KXiQomknWQSieYudzodfj4WYr8sgr+JNMGcv2yQDzXwXKXZhWL8T8d0FBkdienF/OZliPytPJNrE/OHbQ/6wXVbIDXaB27g6yFMiwslnHT8blVR7hS7Dzzu15pOuR+4/fqIXkUj5yN10tkfPKeG0q/fXc3WA4b7w0thrW+06St2pk3CZ1bGn7fiwQ8rHfeHzbe/dd9btoNDdckPd3ebGMepMbexyfuZeX2xc3fBHYf8L123aschYw4a05D7p1fy+qItbNp3OMo+IMp1ETsY1lqm5khfOPThXrRpP28ujp6+INF8NBtta5FuPVDr2Lbgxker4g+N33Gwjk8qE6f5SeUedliWldt4qIkV25rPybz1e9m8L7h1U/d7nD63cldVYHkGhRd/99yAz5cbq3dUxdyP4fu49kgTby7eytsO02qs2VnF0i0HYvaHtObNxVsjc97HI6cE/IK/NXf98vKystae/uet5VTVpWcl8FkJHr77PmjuwuY0qs+6b8OeGn7274X85tXPY8J5pc9xbRIej6qBB9gTZtLC2K6Fboy9fxbfeOiTqGv5a1vZw+enIeolE79WbE3rz++siHnRX/nobB63Dbz5+YuLorYTfU3c/PISfvD0vJi83PjJc7HtEWEuffgTnnDxf/7wmXl846FZke0/za7jkgebt7/35FxG/HWGD4sS85jHwUmXPew+MVomaWgKefJ3X/XkXEbaBuylk4vun8mlD38StS9c8bhj8gp++cqSmMoCwJi/z3Q8x59ubeSXryxxbafKmoC7SUo8/2ZUGNvLye1tlW6cdNKqFeFRp1t8To4UFrmbR59Kx9YtPNsQPn3WnhvxcEs3FeJdR7eG7Mh2VGK2tJOwJ9P+aK++zz3VmVtUYkfAi4FnCl8N1FnuAxm2NZmKZZU5IM/tnsh6DTzRw+02KZH9Qcx2g0tUA6x22mf8LvR51q0+NDfBcnI3FBW4Z+jmW08HIe3srY5pVgz4umb7wbaT6flYIH/nYvHSvpFtHQgSt8uUUQE/0qRT8v/asU/zme3r5vSmbQppNu09TH1jE0s2HwCie5Js3nc4qjukE+FieZllz7rYb1iUwzMyJiKohVcbm0LUNzaxpaq5TPHs1kRfs6VbDrDrUF3MdbTWFu3nePUOfz7ag7UNnh7wvbUh6hL0oXci0ULL8QiFdOS+yBT1jU1sOxBsV9tMkejSNTSFOFjbwJ7a5kDrd2dvnhS32yzeQt9bD9RS7/HWS3kovR+OhIzFb5/78dmRfYm6iLkR63PKLmMfmMm83309ypb9h49w3j0zaF9azAFz5fMCU1kPHm5gxF9ncNXQExKmO3WZ0Qg3feVOvkiwMMX2g7VRfZLDwnlKl9autrt1T/TKX99fzdb9tUz+wl0gjMFXzVct7At8+PuDosJd+PeZcdOI1+gajwF/mcbSP49xDfebj/0L3Kh7K5j+6/N8xXl85jr++l7s/Dvp5LeTlvLZhv2R7XW7qzmlS1lGbUgH/5qzkfunr+GQ5SV/wd8+5q2fD2fACe0zbo/b18LY+2c57h8+8aPmDZfHMuMulIrVRo8OV1eA9v+Zl+1Pp52HmlduCZsSFu3wf2gWy2pzKbCZttVZfn/J6VHb4d4I8d7YYfbXOH/d9O7ahopbRjLvd6N56+fDHcMUBuRDmb1uD9Ntq8/Hv45xXChpvowpzW3uwo6D/lbvWeywIEW6+WhldC+OPVXBrTiUbhI942t2VkWJd5jN+zPXG8VK+Dbz82zZ3WluX90ZrYFb8dKI6Zdc822CsxgV2C6oPUjbVsldFrurxNpGUN7ZqIXHa6wMyidqndIgkraPsJD+L6l03id+z6OXBuagsZe+ZbHzTJu5SKKXe7xjfhb4CBSLgHu95+zB3O6nrDdiJsLvac8lAQ9b4lRjCNfAIwNcYi5aYoGPR5HtxeB0PuK5SoJaeFVr719CoThh092ol85eKH5PY0u/LdoBYD+/LbPwEkmWbH9l+yH8fWl/LhPR4LMnXVauXPmEyZHJm65+ai5TvogdaLGvTif0fTqxPQe6Rv3oGaMPcfg+cxTwAufJncLYxTQ8f4ZbTcLajx6cu1Xaa/9h+nQLxgeqcZi0Ko7ZF90/k6uenBtIvn44+38/9B2nz+9jp7N14vtPzfOVrt2HXz5hMndPiT9x2qwEg8q84lcCc2nBhES2x/M5Z7KHlbXBPfzoH/YxQZq90hVelD0eOfHqfdOhIcraiyGfCA/6aa6Bx4YJC3iTOXjFfuMFdcPF+yI5rVvsYKC7rjzTc7otEtQatdYxLy2/PVxysZLld1reVEg06GfqstQXxLWfX7da7aKN+xMezyTJTe0cvB3xsI5crXfpXeaEfXrjRJ0WIEcEvDgLn5HpJ/6dFhHwOHdj3BvO540YL/2Rp3WJ2Vfawrvf/XeXfCXusXh+bT/kkiss1wji5W4XbD+TxmWbbPSZ94P1K7nWZzdU8H/v54RyFjv0U87TcQYREt1nYRdJeJEJe9i4/mif927cOVAckvdzvuO5YcD5YfdbA8pkbTffCKKtwn6F3NoEckkzk7Mlc2pivTx+xxGA/9HkWeuFYuXNJdtiPjdeWJW5YcXpINF9Nn3lTmav3cP3zTk3Yn3GAd1wcZJxEgE/WSbqdnaoNrYbl99+zrflwTzZmeLLPTVR8+EH0tjs4EJpCmmuenKOY/AJr39BeefWnHNyp9TzTpFEz9WrC5zn68mkC8WaVV2DdzEunzCZG8872bfo50QNHILx7eUSbhWFRyqaVxSy1ioevHpQIJ/JvdoWcP6psa4ScNb1eMLw6wv7xOx7y2FWtTDJzFqYr2TC1fPkzHVRjYjB6LfdhaKprm+MGtxj5w9vLks94wBIphdKRr/mLZn57Q785Mz1/HPORl9xckbAjzbc7rN4s+/179Eufm8TH3fi5b2L47o6nEQgXjvETaNPjdnXYPla6nt8W+9G5RhOLyc/+O3ylQz2QSBB9Gm2v3eaQtq1xpEr3feSa8TMoAvFcn3qfC7Plwwi4FkiUQt1EDXwRGk4iYCf0WJHLC+foPqPZwMvc8QkIp0LIoexT0QWxL0RuyA4NLq3ZOYEyTSoZvIOtT4OyTRi+iUnfOBHGyu3H3K9zT63TGAUPYd3+n12qaZvrXlmYxbDoCj2MEtjIjIxffFzszdEbSdqQHbjw5U7capsh33giVifxERd6SDXuxEu3nSATfsO0xV//b+TRWrgaeDiB5wnqYmH/RPPre+nFxKN/nL7pLz67MSTa91+ed/I73H9u3Nmj3b+jMsRUq2BZ6O3TOey5Odtv+75BdzwzwUxItgU0hn5mgiCXBfw/3tvFb+dtBTIzP0hAm6hvFNpYGn5eR7sQ5ndppf1QosE01vY7+dvDuoRtX3nFYkH9Qzo2T7yu32rFrzzi6/Rs0MrnxZmHy/zkAzvHb/nhZ+l6oLCy9zufglpnZWyJENyLpTMfyZqrTPS/VIEPAew9oNXqEBqQy0T1C7tNRJ7SLev9KBmLsw2XgaQJXr4syF66ej5EtLa3QeeI+TJh0LGmgxEwNPEc8u9T9G5wbJWnlLxpzv1I5stE9bAE6fk5mKxCni4RnSwNriFOjKF0wAyO4lORTbKnI6JuJpC3l4M2V6yEJIbibnX4wLOQbLpUGbOlQi4hetHnAxAaSL/g0eqkrxnOpW14OIzu6ecf6IirN6ZeF5xN6z+9SHlHYHk1v3LNh1K3f3Jc9btjXssGyu2p6MG3tgUsnVrdcZtYeZMkMz765b/fB68IS78eU5mxkMc1QK+YeI4X+F/eM5JbJg4jid+NBiAoeUd+Nt3BqTDtBiGnNSB4b07UdqiiGG9OqacXiIXinVxCSsbJo5zPGdnndg+aruwQEXCpnMll5duOCfusVbFhXw8fmTC+DPHj0p4vG2rYlcbErmzstGDMh1um9qGJk8vhlxYCDlHuqPnDEe1gCdLuG9zSGfuIQ1pHXFtBDHwoEWCKxsjSi7Z2e3JlA88kYtDo137oBcXubiCUjzPWfGBp0HBDh9pyhsfeC5NrJULiIA70DzZlLtIBEVTSLu+LPyYkkhk7bUtv630mRrZlqiRMaTduwG69djw8iJKFCYboxOb0iC0a3dVe3Kh5AJSA4/mmBPwrm1auoaJLLigNSd7WBA4CBqatKswnndqF0Z/pWvKeQ05qYOncL3MZdiG9+4MEHHteGn8C4JubUviHrvwjG6uNehE85aDNwFPVNYg+057vc/S0Y743OwNPGaZmyeXyZUh/blC3gv4+ItOi9r+x4+HMv93o3lkdHSf7vBgk9sv78vsCRew8A9fjxybfNPXWPw/F0a2w891QYGiv6XPs19e/em5fPSb8xOGufrsEwGoa2yi2CIofz0vtl/1bZeczgNXD+L7w4w44/p3555v948Kc9aJ7Zn/+9EJ8/yVhzlAHhxVyru/+BoAN48+lVm/HcW/rx/G7AkXOM4dfumA413TtBIeDNTa0to66Wfn8v4vz2PObRfw95GtOK5dCbN+G+vH/sGwE7nvuwNiBPiTW0cx45aRke2iQhXlJ3/vlyOiwhcVKJb88UJevH6Yo42zJ1yQ8Osknt/4xI6lPPH1Uj60XfsLLC/fOyyDoQDe+e+vxdzLznmmx9Xx6droxtp7z2/laymwTJHr8h2uU4RdmL+5sA+zfjuKxf9zYVSbzANXDWT+70bz/i/PY82dF0el8fNRp3Dr0PiVFyt5P5S+n20UYPd2JXRtW0Lr4uibr4tZ8y4sKOD49tHi2Pf46DTCN0kq7hMFnO2hMfJks5Zb3xCKchl0LS2gXaviqK5qRQWKspZFkTjHtS2JLFYcprxTa7q2KWFFgjxLPCxi27alonVL4/YoLFCc0NF4IdrPXSR8ib9bafTp3fjjW8spLiqgTCmq6xvpXNYyUp4OJca5COdrpVvbEloWFVJbED1UuWeH6LBFhYqTOjWfnxNsxwsKFO1LW3BGnAm5urh8rSUS8JZFtTENvNZeLyNsM0W2blnkOsqyTcuitNTAIdY917lVbtbtcr0CrjD0o7AACEGH1i0i93AHy4Li5Z1a07WtoVUAPdq3YuuBWgBO7dqGIuXt/Oe9gPv9pPIiyeEHM5VGLq9Whee2qG9scvXp2tfZVEBJUbQYJ7OMUxAubb+1tVLzJdLU1DwPh9fG0fB5cJsXxD7XiT39sM3x0nF7gccT8GR96263ckGB4khTeubX8HIP5IL7ItdX5AnfEuFpUOLdCzGzTFo2jzSGUB6LmZuvWR/YL2irOLXLcDhPN6p5FdIwajmGsIjsqT4S02h3RnfnmmHnMqNmeFy7EtqXGl3hOpj/u7Z19/Hb6d7O2+daIuLVzOPRynSdnNK1jDN7Gl9ALYu9nfCwkLi9YO3CbH9owgIdT6iN2lTz/WVf3NevgFvPs1NZ3VzqB2sb+PfcTWkRMS+LD7wwbxNb9h92DZdOclu+mwm3Ccd7l9vvkd5dm7/WWhYX4LWZKSs18BM7lrJpn/cb4c4r+tG5rAUndmxNYYGiRVEBD0xfw3eHnsDheuNVd3avjtw44uSoT2Yw+gLvqannoQ8rPecXipx8b2exQHkf4nvZgON521wQ4fEfnsXu6uYRP/YGs7uuPJNR91ZEtsNi8s1BPWhRVMDF/bpTWKB49tohDCnvyOy1exh5mrdGzoeuHsSXe2ro3bWMC8/o5s34BFw/4mTunrrKU9hvDupBSXEh//zJ2fTr0Y6a+kYWbNxH1zbeXiThU926ZRHP/XgoRQUFHOfhJWQX/LAryelFUKiMF4BVpOes2xP1oNkbMYeWd+CzDfuj7pspN43gveU76NOtjIv6HsfDM4zGwq5tSvjrt/rz29eWxrG1WQR6tG/Fcz8eyoV/nwkYX1le3GBuzLntAs69+yNfcZZsPhDjqsok4XdX765l/PnSvmw/WMv4Sc7nMEyfbukbq+BGPA2xf7E+ePUg+v95GgAX9T2OuQcquefb/V3LlpKAK6XGAg8AhcDTWuuJXuKd2bOdLwE/5+SO9O4avZL6/VcNAmDacmMln7YlRXzdQYhO7FTKiZ1KIw+9F00O97X1+kn/6wv7cO+0Na7higsVo0/vytufb+PSAccztl93Xpq/KXK8yFYDP769sygppfhG/+ZGwwu+YpR7bD/vIzj9Njq6YT1XXdu0ZFeCZdfCvVnO62P4gTta/IResNZAvb6wILZGHh5x6/SlNbCrcczavS5mLUmbgIdfQFb7zji+bVwf+3eG9IwScGttv2ebAjaaw7FHn96VU7s13/+1R5pSFvDz+3She7vEX025ONV7+OvrNxf24WundmZ3gvssTDbnrPfqQmlb0jyoLHxtvzPkBG57/YuEvZ2SdhIopQqBR4CLgTOAq5VSZ3iJ6/cTsFWCFdPDZXPrghfO0kuf54gLxUzTzb/rtThKKerNT9XwDITW2p+925v9xstx918Et+clVfEJ6jyE3W1OD7iTN8c+R41dwMMPpVdfsf2etUazfozZZ6c8HMBCAfk6IVnkOTbN9zLtRSZWToqH/d4Kn3av59/t5ZNKDfxsYK3Wej2AUupl4HJI2AECM6yvjOL5taH5RLiJQon5RHo5cfabo31pMXuq409uUuxhWlIwyhGuZZVZeniEsduW6kjBbJHoeoH/uWbsLqpU5/GOpGueb6fz3MocxVlcqCK18InvreKFec1fTHtrjqBUs6iEy+VlmlonrC406wvE/rx8/6m5rn3caw4fpvWij+MeTyQMYTtatyyKmXbh9ndW8MB0d3ekW/7Jxm9upPf23ANs2neYC+/zZ0uq9oexf/WVd2rN+j01nr8K2pQUJZyMSyXbIKKU+jYwVmt9vbn9I2CY1vq/beFuBG4EKOl28uCf3vkYp3csZNrGBmoaNCENe2s1g7oWUlqsWLm3ifomaGhqYvthxZiTirj6Ky3iin5Ia15b08DFvYopa9Ecprq6mrKyZt/XoXrNtI0NXHlqceTkzdveSGkRnNmlKCbNl5fX8I0+rWnbQrG9OsSiXY3061xI5f4QbVsqSgrhYL2ma2kBvdoV8MbaBnYdDnGgTnNG50JOKztCv+5G/gt3NvLMF/X84ZxWdClVvF7ZwGWnFNOqSLGvLsTfF9azry7EbwaXcHL7wij7X6s8wuKdjZzVrYhv9i72/PKzl98vycRftLORd9c38LMBLflsy2F6dijhwUX13DG8FXVNmi92N9EQIuoaeMl/c1WI5XuaOFCvOVgf4tq+LWmZYJh85f4mvtxby5jeZRG7lIJBXYt4Z90RjjRBu5aKr5/U/Nn6RuURerYpYNKaI9Q3we8HhejSvozNVSGmb2zg4y2NDOkWKxY92xRQ26BZsz/ELUNLmPplA2NOKqagocbx/G042MS6gyFGn2jk/fGWBo5vXcCpHQppDGkmrTlCQwgu7N7AnD0t2Fod4sf9WtK6WDHlyyNM39jIye3cXxBNjY0UFkXf1+sPhthXpxncrZARPYoY2LWISWuO8O56Q6S/0rGAnTWa3wwpob06zCFKmb+jkZ01IeZsN2r9TufAa/5+SBS/ZaHie6e1oG1L4x7454p6DjdoGkPQoUTRrrCBelrwzvoGfnh6C1bt8//F4tf+xhBsrwlxQpsCupYW8N6GI5x9XBFXndaC9iXN12vt/ibm7Wjk6q+0iHkGluxqpDEEQ44ritz/Ye2597qLFmqth8RkbEw87v8P+DaG3zu8/SPg4URx+vTpo70yY8YMz2ElvsSX+BI/qLyDSCPo+MAC7aCpqXSU2wpY197qae4TBEEQMkAqAv4ZcKpSqpdSqgVwFfB2MGYJgiAIbiTtpNJaNyql/ht4H6Mb4bNa6+WBWSYIgiAkJKV+4FrrKcCUgGwRBEEQfJD3Q+kFQRCOVUTABUEQ8hQRcEEQhDxFBFwQBCFPSXokZlKZKVUFrPYYvB1wMIXssh2/M7Ani/lnO76UX8qfrfKnansQaQRd/tO01m1iQjmN7knXH3FGE8UJ+2SKeWU7vuey5qj9Un4pf16WP1Xbc7H88dLLZRfKO3keP1Wybb+UP7tk2/58Ln8QtudF+TPtQlmgnSZkOQo5lsrqhJRfyi/lD6788dLLdA38yQznl02OpbI6IeU/tpHyZyC9jNbABUEQhODIZR+4IAiCkAARcEEQhDxFBNwjSqkTlFIzlFIrlFLLlVI3m/s7KqU+UEpVmv87mPu/opSao5SqV0rd4pZOrhNg+UuUUvOVUp+b6fwlW2XyQ1Dlt6RXqJRarJR6N9NlSYYgy6+U2qCU+kIptUQptSAb5fFLwOVvr5SapJRapZRaqZQ6N2m7xAfuDaVUd6C71nqRUqoNsBC4ArgW2Ke1nqiUmgB00FrfqpTqCpxkhtmvtb43UTpaa9e1RLNJgOVXQGutdbVSqhj4BLhZaz0344XyQVDlt6T3a2AI0FZr/Y3MlSQ5giy/UmoDMERrncpAl4wScPmfB2ZprZ9WxloKpVrrA8nYJTVwj2itt2utF5m/q4CVQA+MhZyfN4M9j3HB0Frv0lp/BjR4TCenCbD8WmtdbW4Wm385X4sIqvwASqmewDjg6fRbHgxBlj8fCar8Sql2wHnAM2a4I8mKN4iAJ4VSqhwYBMwDummtt5uHdgDdkkwnb0i1/Kb7YAmwC/hAa31MlR+4H/gtEHIJl5MEUH4NTFNKLVTGoud5RYrl7wXsBv5hutCeVkq1TtYWEXCfKKXKgNeAX2qtD1mPacMf5ak2mSidXCaI8mutm7TWAzHWUT1bKdUvHbamg1TLr5T6BrBLa70wfVamj4Du/69prc8CLgZ+rpQ6L3hL00MA5S8CzgIe01oPAmqACcnaIwLuA9Nn+xrwgtb6dXP3TtM/FvaT7UoynZwnqPKHMT8dZwBjAzY1LQRU/uHAZaYf+GXgAqXUv9NkcqAEdf211lvN/7uAN4Cz02NxsARU/i3AFstX5yQMQU8KEXCPmI1vzwArtdb3WQ69DVxj/r4GeCvJdHKaAMvfRSnV3vzdCrgQWBW4wQETVPm11rdprXtqrcsxFgL/SGv9wzSYHCgBXv/WZiMgputgDLAseIuDJcDrvwPYrJQ6zdw1Gki+A4NOcdauY+UP+BrG59FSYIn5dwnQCfgQqASmAx3N8MdhvG0PAQfM323jpZPt8mWw/P2BxWY6y4A/ZrtsmSy/Lc2RwLvZLluGr//JwOfm33Lg99kuW6avPzAQWGCm9SZGz5Wk7JJuhIIgCHmKuFAEQRDyFBFwQRCEPEUEXBAEIU8RARcEQchTRMAFQRDyFBFwQRCEPEUEXBAEIU/5//7z4DSVnDUiAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAA3DElEQVR4nO2deZxVxZX4v6cbkKXZbRdEaRZBBAFlNQR9xAWiiWbGJBMd89PJQmY+MxkniYiZZGJiJo4zmZjFzGhciDqJRlyiERABpUVlk2aRZu0GGmi2hm6g+/W+1O+Pt/Tb333v3bfc7vP9fN7n3Vu3TtWpunXPrXtuVV0xxqAoiqI4j7xsK6AoiqIkhxpwRVEUh6IGXFEUxaGoAVcURXEoasAVRVEcihpwRVEUh6IGXFEUxaGoAVcURXEoasCVqIhIhYg0iog74DdMRM4Tkf8QkcPe42UislBEJET+cyKySUTqRaRaRP4oIsMDjt8rIu0BaR8Ukd+LyNiQdL4uIntEpE5ETorIchHpH0f3uSKyRkTOiUhFAmUu8so1ePO8MeT4d0TkhIjUishiETkv4NhPRWSHiLSJyI8TyPNiEXlWRI57y7hHRH4iIv0C4oiIHBCRXQmk+2URWectS7FVOcU5qAFX4vF5Y0xBwO8Y8ApwA3AL0B/4KrAA+LVPSES+CLwI/Ao4H5gANAMfisjggPTXG2MKgIHAjUAjUCIiE73pXA88AtxpjOkPjAdetqB3PbAYWJhgeV8CtgJDgR8Ar4pIoVeXecCD3rKPAEYBPwmQLQceAJZZzUxEhgDrgT7Atd4y3gQMAkYHRL0OuAAYJSLTLSZfg6f+H7Wqj+IwjDH601/EH1AB3BgSdgPQBFwaEj4TaAfGAAIcAh4IiZMHlAIPe/fvBT6MkO9S4FXv9v3AGymU4UagwmLcsXhuMv0Dwj4A/t67/SLwSEhdnIiQzh+AH1vM89+BHUBenHiLgT8CrwO/TbAOvgEUZ7s96c/+n/bAlUS5CdhojDkSGGiM2QhU4jFq44DL8PTUA+N0AK9504jF68Ac7/ZGYJ7XpTA70GWRBiYAB4wxdQFh273hvuPbQ45dKCJDU8jzRuB1b91ERET6Al/EY8D/CHxFRHqlkKfSRVADrsTjDRE56/29gccdcjxK3OPe4+cH7EeLE4tjwBAAY8wHwF8D1+BxTVSLyGMikp9QKaxRAJwLCTuHx00U6bhvO6Y/Pg5DiV6fPv4az5PBSjx10BO4NYU8lS6CGnAlHl8wxgzy/r4AnAYujhL3Yu/x0wH70eLE4hI8/lsAjDFvG2M+j8eo347H9fINqwVIADcwICRsAFAX5bhvu47kqSZ6ffq4B1hijGkzxjTheYq5J4U8lS6CGnAlUVYDM0Xk0sBAEZkJXAq8B+zF4075UkicPOAO4N04efwVHt9zEMaYDmPMu948JiZbgBjsxPOSMLBHPdkb7js+OeTYSWNMdQp5rgb+yls3YXhH7XwGuNs7+uUEHnfKLSIS70lG6eKoAVcSwhizGo8Bfk1EJohIvojMwvPi7gljTJkxxuB5+fhDEblLRHqLyEXAM3h6rb8MTdebzkgReRxw4R3dISK3i8hXRGSwdyjdDOB6YEMsPUUkT0R643E3iFeHmH5jY8w+YBvwkDf+XwGT8PR4AV4Avi4iV4rIIOCHwHMBefb05pkH9PCmEc/V8xieOnleREZ407nE6yaahGeEzz487xWmeH9j8dwg74xTB/lefXoAeV59esbRR3ES2X6Lqr/c/RFhFIo3vDfwn8ARPMP+yvEMr8sLiXc78DGeIX01eIboXRpw/F48I1fc3jiHgOeB8QFxrsNzwziNx1Wxj5DRLVF0dwEm5FdsQa4IKPaWa29o+YHvAieBWuD3wHkBx56LkOe9FvIchmeUyQlvGfcADwF9vdvfjiDzALA5Trr3RtDnuWy3K/3Z9xPviVYURVEchrpQFEVRHIoacMWxiMhOCZ7m7/v9bQyZOVFk3GnU88koeT5pQ9oRyyIic+JLK05HXSiKoigOpUcmMxs0aJAZM2aMpbj19fX069cvfkSVV3mVV/kuLl9SUnLaGFMYFjGTb0zHjh1rrLJmzRrLcVVe5VVe5buyPFFGHKkPXFEUxaGoAVcURXEoasAVRVEcSkZfYkaitbWVyspKmpqagsIHDhzI7t27k05X5cPle/fuzfDhw+nZU2dTK0pXIOsGvLKykv79+1NUVIQEfJGrrq6O/v2TX6VT5YPljTFUV1dTWVnJyJEjk05XUZTcIesulKamJoYOHRpkvBX7ERGGDh0a9qSjKIpzyboBB9R4ZwitZ0XpWuSEAVcyT0tbB0s2H6GjQ2fiKopT6fYG/OzZs/zv//5vUrK33HILZ8+etVehGHzqU5+yLa0nivfzwKuf8NYnx2xLU1GUzKIGPIYBb2triym7fPlyBg0alAatIuuxbt0629I87W4G4Fxjq21pKoqSWbq9AX/wwQfZv38/U6ZMYeHChRQXFzNnzhz+5m/+hiuvvBKAL3zhC0ydOpUJEybw1FNP+WWLioo4ffo0FRUVjB8/nm9+85tMmDCBm2++mcbGxrC8XnnlFSZOnMjkyZO57rrrAGhvb2fhwoVMnz6dSZMm8bvf/Q6ADz74gDlz5nDbbbf59SgoKPCn9fOf/9wv89BDDwGe9RNuvfVWJk+ezMyZM3n55ZfTU2mKouQEWR9GGMhP3trJrmO1gMew5ecn/+Fxn/yVwwbw0OcnRI336KOPUlpayrZt2wAoLi5my5YtbNiwgauuugqAxYsXM2TIEBobG5k+fTp33HEHQ4cODUqnrKyMl156iaeffpovf/nLvPnmm3zzm98MivPwww/zzjvvcMkll/hdL88++ywDBw7k448/prm5mdmzZ3PzzTcDsGXLFkpLS8OG/a1cuZKysjI2bdqEMYbbbruNtWvXcurUKYYNG8ayZcuoq6ujo6Mjbj3pYpSK4ly6fQ88EjNmzKCoqMi//5vf/IbJkycza9Ysjhw5QllZWZjMyJEjmTJlCgBTp07l8OHDYXFmz57Nvffey9NPP017ezvgMcYvvPACU6ZMYebMmVRXV/vTnzFjRsQx2ytXrmTlypVcffXVXHPNNezZs4eysjKuuuoqVq1axaJFi1i3bh0DBw60oTYURclVcqoHHthTzuZEmMBlHIuLi1m9ejXr16+nb9++uFyuiGOpzzvvPP92fn5+RP/5k08+ycaNG1m2bBlTp06lpKQEYwyPP/448+bNC4q7fPnyqMtRGmP4/ve/z7e+9a2wY1u2bGH58uX89Kc/ZePGjfzoRz+KWVYdWagozqXb98D79+9PXV1d1OPnzp1j8ODB9O3blz179rBhQ8yPocdk//79zJw5k4cffpjCwkKOHDnCvHnzeOKJJ2ht9bxM3LdvH/X19THTmTdvHosXL8bt9nxE5ujRo1RVVXHs2DH69u3L3XffzT//8z+zZcuWpHVVFCX3yakeeDYYOnQos2fPZuLEiXz2s5/l1ltvDTo+f/58nnzyScaPH8+4ceOYNWtW0nktXLiQsrIyjDHccMMNTJ48mUmTJlFRUcE111yDMYbCwkLeeOONmOncfPPN7N69m2uvvRbwvNz8wx/+QHl5OQsXLiQvL4+8vLygF67RUB+4ojiXbm/AAV588cWgfZfL5e+Vn3feebz99tsR5SoqKgA4//zzKS0t9Yfff//9EXv1r7/+eliYiPDII4/wyCOPBIXPmTOHW265JSjM1+MGuO+++7jvvvuCjo8ePdrvionnQlLXiaI4n27vQumuaM9bUZxPXAMuIotFpEpESiMc+56IGBE5Pz3qKYqiKNGw0gN/DpgfGigilwI3A+Hj5RLEaHcwIwTWs7pQFMX5xDXgxpi1QE2EQ78EHgBSsr69e/emurpajXia8a0H3rt372yroiiKTYgVwykiRcBSY8xE7/7twGeMMfeJSAUwzRhzOorsAmABQGFh4dQlS5aEHqdfv35hsy6NMSktf6ry4fLt7e3U19djjOGFXc28d7iNu8f34sYR4V/ocbvdQVP3E0XlVV7l7ZOfO3duiTFmWljESJ+qD/0BRUCpd7svsBEY6N2vAM63ks7YsWONVdasWWM5rsonLv/DP+8wIxYtNc99dDAr+au8yqu8dXlgs4lgU5MZhTIaGAls9/a+hwNbROSiJNJSsoT6wBXF+SQ8DtwYswO4wLcfz4WiKIqipAcrwwhfAtYD40SkUkS+nn61FEVRlHjE7YEbY+6Mc7zINm0URVEUy+hMTEVRFIeiBlxRFMWhqAHv5hidQKUojkUNuKIoikNRA97NSWW2p6Io2UUNeDdHXSiK4lzUgHczmlrbae8waL9bUZyPfpGnm3HFv63ANa6Qy4b0zbYqiqKkiPbAuyHFe0/5t9UHrijORQ14N0d94IriXNSAd1O0360ozkcNeDdF+92K4nzUgCuKojgUNeDdFHWhKIrzUQOuKIriUNSAK4qiOBQ14IqiKA5FDbiiKIpDsfJNzMUiUiUipQFhPxeRPSLyiYj8WUQGpVVLRVEUJQwrPfDngPkhYauAicaYScA+4Ps266VkCB0PrijOJa4BN8asBWpCwlYaY9q8uxuA4WnQTVEURYmBWFkLQ0SKgKXGmIkRjr0FvGyM+UMU2QXAAoDCwsKpS5YssaSY2+2moKDAUlyVty5/74p6AG64rAfvHm7jb8f34qYRPTOWv8qrvMonLj937twSY8y0sIjGmLg/oAgojRD+A+DPeG8E8X5jx441VlmzZo3luCpvXX7EoqVmxKKl5kdv7DAjFi01v//wQEbzV3mVV/nE5YHNJoJNTXo9cBG5F/gccIM3A8WB6IlTFOeSlAEXkfnAA8D1xpgGe1VSMoGuA64ozsfKMMKXgPXAOBGpFJGvA78F+gOrRGSbiDyZZj0Vm9GHJkVxPnF74MaYOyMEP5sGXRRFUZQE0JmY3RR1oSiK81EDriiK4lDUgHdT1AeuKM5HDbiiKIpDUQPeTVEfuKI4HzXgiqIoDkUNuKIoikNRA64oiuJQ1IAriqI4FDXgiqIoDkUNeDdHh4MrinNRA64oiuJQ1IB3c3Q4uKI4FzXg3Rx1oSiKc1EDriiK4lDUgCuKojgUNeDdHPWBK4pzsfJJtcUiUiUipQFhQ0RklYiUef8Hp1dNJV2oD1xRnIuVHvhzwPyQsAeBd40xlwPvevcVRVGUDBLXgBtj1gI1IcG3A897t58HvmCvWoqSHOVVdSz5+Ei21VCUjCBWvswiIkXAUmPMRO/+WWPMIO+2AGd8+xFkFwALAAoLC6cuWbLEkmJut5uCggJLcVXeuvy9K+oBuPGyHqw+3MZdV/Ti5qKeGcs/3fJ/t6IeA/z208aR+qu8ykeSnzt3bokxZlpYRGNM3B9QBJQG7J8NOX7GSjpjx441VlmzZo3luCpvXX7EoqVmxKKl5qE3S82IRUvNsx8cyGj+6Zb3lc+p+qu8ykeSBzabCDY12VEoJ0XkYgDvf1WS6SiKoihJkqwB/wtwj3f7HuBNe9RRFEVRrGJlGOFLwHpgnIhUisjXgUeBm0SkDLjRu68oiqJkkB7xIhhj7oxy6AabdVEURVESQGdiKoqiOBQ14IqiKA5FDbiiKIpDUQOuKIriUNSAK4qiOBQ14N0cXYxQUZyLowz4rmO1HKquz7YaXYKuvg54ZV0HB065s62GoqQVRxnwW37zAdf/vDjbanQJuvo64D/8qJHP/OL9bKuhKGnFUQZcsZ8u3hFXlC6NGvBuThfviCtKl0YNeDelq/vAFaU7oAa8m9LVfeCK0h1QA64oiuJQ1IB3U9SFoijORw24oiiKQ1EDriiK4lDUgCuKojiUlAy4iHxHRHaKSKmIvCQive1STFEURYlN0gZcRC4B/hmYZoyZCOQDX7FLMUVRFCU2qbpQegB9RKQH0Bc4lrpKzqG9w3CkpiHbaqSE0QHhiuJYJJULWETuA34GNAIrjTF/GyHOAmABQGFh4dQlS5ZYStvtdlNQUBAUdu8Kz0qEz83vl5R8IliRf2VvC8sOtvKL6/swtE/wvTAT+Scj76vDm0b0YNWhNu68ohfzinpmLP90y/vK58NKW7Ezf5VX+XTIz507t8QYMy0sojEmqR8wGHgPKAR6Am8Ad8eSGTt2rLHKmjVrwsJGLFpqRixamrR8IliR//zjH5gRi5aarYfPZCX/ZOR9dfjjv5SaEYuWmmc+OJDR/NMt7ytfIm3FzvxVXuXTIQ9sNhFsaioulBuBg8aYU8aYVuB14FMppOc4dC6MoijZJBUDfhiYJSJ9RUSAG4Dd9qjlLIyD/chO1l1RujtJG3BjzEbgVWALsMOb1lM26eUMvPPR1QQqipINeqQibIx5CHjIJl0cR1dwoYguiqIojkVnYtqAk70Q6kJRFOeiBjwFnNx5lS7x/KAo3Rs14LagvVhFUTKPGvAU8PVhneiFMHrTURTHowY8BZz2AlD93YrStVADbgNONIvqA1cU56MGPAXUBCqKkk2yasBb2zsoenAZj63aFzfuT5fuyoBGyWGnZ6LowWX804tb7EswAPWgKLnG6H9dzl1Pb8i2Go4lqwa8ua0DgGc/OBA37rMfHky3OgmTLhf40k+OpydhRckx2jsM6/ZXZ1sNx6IuFBvQl4OKomQDNeAp4HsRqOZbUZRsoAY8FRz2FlNvNIrStcgJA+50w6IeFEVRskFOGHCn4rAOuKIoXYycMOBON4ROmZauL1sVpWuREwbcqThsJn0QTtZdURQPasC7KdoZVxTnowbcDtQYKoqSBVIy4CIySEReFZE9IrJbRK61SzEn4LQFofQ+oyhdi5S+iQn8GlhhjPmiiPQC+iaTiNMNixP1Vx+4ojifpHvgIjIQuA54FsAY02KMOZtIGqmMiqhtamXav6+m5FBN0mkkw8cVNUz/2WrqmlqDjOCRmgamPLySQ9X1GdUnWWKtLTP/V2v58GhrBrXpnjS0tDHzkdWsKz8d8fiHZaeZ+chqGlvaM6xZ8hw928iUh1dy3N2RbVXSxn+/s5e//7+SbKsBgCRrREVkCvAUsAuYDJQA9xlj6kPiLQAWABQWFk5dsmSJ/1hjm+EfVjdwXj787qZ+Qem73W4KCgr8+/eu6Ez2ufn92HGqjV+UNDNhaB4Lp/cJ0y9UPlGiyT+6qZE9NR0smt6bv+xvYXdNBwun9ab8bDt/Lm/l86N7csflvZLO31fO337a2K5/W4fhGysbgsK+Mq4X80f2jKjDc/ODz0mq+WdCPrCdQPJlyIT++8+289MNTYwamMePrg1uw263m//ans/hug5+fG1vigbm255/OuSXH2xhyd5WbrjE8NWr4stHa2vZ0t+KvJXrw+78586dW2KMmRYaLxUXSg/gGuDbxpiNIvJr4EHg3wIjGWOewmPoGTdunHG5XP5jtU2tsHol+fn5BIYDFBcXB4etWObfdLlc5O07BSWbGDJkCC7XzDDlwuQTJJr8k/vWQ00Nk6dM5sMz5VBTzaTJk2g/fBbK91E0YgQu17jk8/eWs6CgwHb9W9o6YOXbQWFjxozGNWdURB3SUX9plw9oJ5B8GTKh/8DDZ2DDOvoPGIDLNTtMvn//fKirZdq0aUy8ZKDt+adDfq/sh7176NmzlzX5KG0ta+3HiryF6yNT+qfyErMSqDTGbPTuv4rHoFvG1/lPxh3rc19kbTiccd5LTEVRuhZJG3BjzAngiIiM8wbdgMedkhE6VwLM/itEHVOtpBNtX0o0Uh2F8m3gj94RKAeAv0tI2oaGmc3G7bSRHLlws1Os47T2pWSelAy4MWYbEOZYTzidJGSy5UKJ5DZRs6ikE73xKtHI6kzMVBpmtjonkXTuSotEdaWy5DrxatrvJtRTokTBuVPpfT3wLPZOpAs846pxyF26QPNS0kx2e+ApGI9s9U6c7EKxUldq0DOH2mclVZzbA/eSTXvTFS5A7eXlPnpPVaKRZR948vgNTy607lzQwSa6UFEcj95blXhk2YXiMRdJTeTxpZElk2PoGr1XdZkoinPJCRdKfUs73399Bw0tbZZlcukF4ssfH/FvP/5eOW3tzlvIp7zKzSPLd0cdhdLU6jlHZxta/GGPrdzL0k+O8bNlu3T0ShqJVrfPfXSQtftOpTXvP248xLu7TyYsZ4zhoTdLOXa2MQ1aKT5SnciTEoHN8qVNhxl5fl8WXDc6sTSyZDeEzqeAFTtPcOWwAf5jmw5mdoVEO/i75zZxpKaRu2ZcFvH4KyWVvLTpMD3zhYdvn0hHh+E375X7j989awQjhia/+FV3JG7TFd9s48j8+C3PxOeKR2+1TadQfvDn0qTy2Humg3cOHaKsys2L35yVDtUUcqQH7iMRY+yfyJMeVeLicaHkzlNAqnTEeWjw9QJ950j72+nHya3L1z70wSy9OHgYYXawZLNz9MqzNIww7nGfIdcrU1GyTU71wBOhcyp9FifyZC3nzNOdypprOPle2YUeUnOSnJpKn9jJju0fzCZOXmbW6g0xNJaTy5wt4tWYGj8lHo7tgfvIVu/EmOgXWFe+8Pw+8Fy8cyo5g7aPzJBdA56KDzzLLzG9WiQQmn2sjJmPGqMr35VyHudZQ5/G2mzSi3NnYtqmRZL5x1DAyaNTovacQg7oEqepE381Qm88B1e1utbSi+NdKNlq3elwoThpZIeDVHUsTu4IKJnBucMI40xyUMKxVt9RIqkxyRpObuPabNJLygZcRPJFZKuILE05rQQet3Lh8TKatsm2WSf3avVCtR+tUiUedvTA7wN2JyOY0hd5stS6reSbtAslOTFbiXcT0Rl2ihWc5A50MikZcBEZDtwKPGMlvrvVBC2G9FpJZcR4NfUtfFDZGjvvgK/Sv7/vFHtO1IbFeWXzEWrqW8LCk2VF6QkqTjcA8OKmQzS0tMeVqapt4o2tR1mzp4qyk3X+8IaWNv6w4VDUhv7xiTaO1DTgbm7jjxsjxws8VtvUykubDofFe2fnCSpO1wPwdumJsDSs3kSt3JPW7K3i7R3HLaVnF+0dhhfWV9DcFv9cJMryHcc5UtNga5plJ+tYs6cKgFN1zZZkrNjC9/acpLyqLn7EAMqr3JYXqop2rQbS0tbBC+sPxYxT3xy73Udi9a6T7D/lDgs/7W7mzqc2JHyNbzpYw9bDZ/z7B065WbWrsx4OVdez7lgb/2dBT2MMiz88yJPv76ejw56b1hPF+3mnwmP//ry1kqq6pqhxU13M6lfAA0D/aBFEZAGwAKDXRWP4f//7Ht+d1pvTjR389/vBK5WV799Pccdh/nNTI7trOhi3/D0u6Bt+jykuLqbinOeCratzc8/iTQA8N79zMaWDp9z8ZMUnjB+Sx6IZfRIumNvtpri4OCjs71fU+7eX7wg2hgcPHvRvb926lYt6NFJcXMwPP2yg0t15Yn06PrezmeIjbdQcKWNSoec0tAc0gP/Z1szzO9cw5YIefHi0jdrKcsYPzQ/K89kdzXxwtI1zleUUH2ll04l26o+WMWZwvl//b62oJ1/g2Xn9uD9Afx/79x+guOMITU2eRrJp08f+Y4Hl33fY06COHTtGcXE1zW3BjXXDhg0sXNvoL2Ok+ksEq/IfHm3lmR0tbC7dx19f3ivseLI6uN1u/mnFFvr0gCduTHyRrmj63+s9B8/N78e3vNsnqs+FxXW73dTWes731q1baTgUfO4DKS4u5msB6cbKP5ou8fT/3ivboWofQ/tE7/O9Wd7C0bOedtLe0QEIZ2rOBKWzuLSZtZVtnK0sZ+L5nWWKVH5f2Dei6Pntd+upa4XPPbaaR+b0jal/IKHlDt3/xjv1tBngk1KqDpUx9cLIZrK4uJjtp9r4ZYnnRlxdeYDZl/SMm38smtoM/7na02m49p01fGdNAyMHRK/zpA24iHwOqDLGlIiIK1o8Y8xTwFMA5118uWnv1Q+Xaw4HTrnh/feD4o4ZPRrXdaP4aUkxUM8106Yz5gLvvWHFMn88l8vFJ5VnYf1H9OtXALW1/nAfR996D2ikrUdfXK7rEy5fcXFxUHqhOoQycuRIKN8HwNRrruHcge24XC7ca1cCnU8TvjT/dKQEOMHlV0zAddXFALS2d8DKt/1x3a3Qs2AwcIpxEybiuuLCoDxfqPgYqGLs+Il8UH0QqObKqybzqTHnd+q/YhntBv92KL46773hPWhqZNr06fDR2iBdASo3HIJdpQwbNgyX6yrqm9tg9Tv+47NmzYK1a/xyEesvAazKV3x0EHbsYtAFw3C5JoaVMVkdPBdfPY1tyaURVX+vfoHno1fv3mFxi4uLGTiwF5w5w5QpU5g5aqiltHzpWKq/EJmI+gfU57QZM2OuOPl+3U4orwAgLy8PMAwZOgSXa4Y/zouHNwMnGXPFBFwTL4qqQ5D+UeLUecPPtebFlg8lNL2Q/baAMo8eOx7X1ZdElXd/cgxKtgJw6ajLcV1bFD//GNQ2tcLqlQDMmHUtrHmXetMzavxUXCizgdtEpAL4E/AZEflDCuklhXraUnsf4PTx3F15qF2nm9C5hJ6dXFjDyClYuTaTNuDGmO8bY4YbY4qArwDvGWPuji/n+Y904SVyLeby+Q8sWyLmxY4ypZqEVYOew9Xfdcixe5Md7bMr3JQCSacdspJ2xseB213eXLyTJz2M0IZROemujs61UHKv3pX0ksgZjxY3He00m09h6bwKrKRtyxd5jDHFQLEdaVnOM5OZ5SiBRjTZNhw69j7ahZXrnoqufD/JlbLFH5ERHhbabvK8AR02FirHm2bKxKqpzPfAbW6NudK4AwlstIn0DpItS2AWifbiQ+NbvbDClpPN4XH5TiXXimbHpeY7X3Ya8GySzidRK2k7di0U/ye+crAvno0FfPy+xRSrI7588KfVrMspTiehc+x71xUSnBZ3R67d6WwiJ33gPlK9c+WyvUj2ppJolcT9/Fla37CkL+lksPtGnks+/tzppFjXo3M52WDrmo4lMLqo/fYTq64c2wP3kUPXmZ9M6xStxx+oh1WD5LRH23RdvLlQC37blwvKJEg0lfP86/g7sFAZIPiajR8/Cz5wz3+qj1K5/HX0ZHWyZRRKQFigMbZql6O+xAx92Rmiq8PsviPItSF3yZzjaC6Ujo7U9fGRzfqxvd0HGvB0jgNPFbseUXPlUTdabzfebSpQ+0hFiVW6eEVPZmkGyy8xQ33gOWNm7CFHmhWQO7rY8hLTxrT8ZLF+bHfdkVinK+MG/MiZBl4rqUzKuPh4fl0FmytqwsL/b30FZ+pbOHi6njPN9lZsvIVqgio+RrzGlna2Hznr399ccYZD1fXsOha+GBfAtoC44JlqW3r0XFBY5ZkGjp/zrGUSePOwqlMggfGOnW3kcHUDh6sbOH6u0Zu+51hrSBdqS8DiQD52H68NWrwMYP3+ak7WNnGkpoGjZxvDZOLqZwwbD1SH3bijNfaSQ8F6dXQYNh0Mbzud8WtoabOxe5gE7R2GfWfaKfMuTnXa3Uy1u9m/GNqpumbKq8IXd4pEyaEz/vIcrm7gWIQ633uijpJDNZ6lHLyUn2kPWyCstrGVnceC215jS7u/jQadk4DN/afc7DpWS3lVXec6/jbelSKl5GsnOyrPeZZ9ANraOyLaDR8fV9QErUdkhf1V4esLAZxu7AhbCK306DnqmiIv0rfpYHjebf5rLLpOtowDT4SGlna+98p2fvGlyWHHrLpVHvrLTv92YNH+7c2d/NubO8MFbGDxRwdjHg+s+8C2GVqk+1/ZzrIdx5leNNifri/t0p/MC0v3bEPwCf/a7z9m86EzHPyPW/xhgWWO1qOPdsGEjybpDPjUo+9FlAH473f2Bu1/5+XtYXE+++sPGHV+P9673wVAU2s7dz69gasvG8TWw2cBqHj01qh5ROIv249x35+28V9fnMSXp10adxzhHU+sY/V3r2fMBQUAPPPhAR5ZvofnvzaD68cWBsXde6KOO55Yz72fKuLTBQmplTCRbng+fvteOb/c2LkC3b+8vI0BvXtQ29RGxaO3MvvR92hpj3+TKa+q444n1nHPtSP4ye0Tue7nawBPnQeu7jfvV561bxZcN4p/vWU8B065+feNTRxkV1B69yzeRH1Le9A5W/jqdpZ+cpyPf3BjRB1E4IZfdK559KWpwwF7nyoite2PjrXxzDsbALhubCEvfG0Gv1i1jyeK90dN50tPrud7N41NKO/frimPGH7/+43w/hp/XbV3GD73+IfMGDmEJd+6Nijuuv2nuevpjSycN467ZlzmD39s1b64+WfNhVIVYSnNpO7KGXp8itfjsaq7r7fS2Bq+/KmVNDZ7e5RWegpBPnBL2lm/sHYft7Z06YHTnT0UXw/PZ7yT4XB1Q9C/Fc41dj4F+HpMkXqivmVJdx+P/DRkJ1W1ne0/1B+8L8KysLVNbf5tK8YboKbec/PfFaE8ZyIsweor95mGyHL1EZZP9rfnkGPRmlE6xoFHSqmqoTN0m/dmufdE/DYbadlaO/Bd26FPhADHz3pu1vur3EFl2WNB3+z5wG2yvDniHgxpkOnTyncBtEe7AKI8CViIHnE/LH4KRbOjVlJNw8pUbmNDPt2FzkEJ0WKEDiO0/8VsxBmggdsJDJjIS9PMMJ+KkTppwU/NOe4D9xF52m3ilZepl5iJvDBMViUrYnkBb/HjxQ/ugVt1oVhQIkZ6MWVsPFWhTSV20gGLi8UYxhY0ozWDFjzlOREZfsuZSH6h5ykda6HEa4u+PK1Yl3Stq2KpvBJ5FFlOjgPPldEjVon3yBf4kjPZkhkLT8a+cbRtHR1R7uadYVbexyQ6lT6VCzAd5zzxy82eGau5hE0fgkkqP985tWr3Oj9Gbp/SkcoftJxFAmnlpcd+JzW6y4pM1gy4XY0uV65Dq2OuYxmxWCfM9+iZyDhaK49joeHxGk1KLhQbTlaqaUQaLx+eSWbbVap5ZXryVfBIJw+hPdf4PvD06OPPJyjP7LtQfEQbMdN5PKADZqVDZ4NOtpFM1eVKTypSr8RD5IYdSe+ofm06T6wvtbYoZzfRmVwRMrIWLUvvm0PrwR8eI/HAa9Lfw4px0Wd6THuqbTjjBjxw2+cDJ/L5DT1Pseo/XfiytGKb89LUBfdPPIxQbP9NMNqM6hjp5pQPPDl7kyEfeJzj0UZ8hPlqY5xIKx9FjfcSM9ojWPQeuAn6t9oziv+yM4J7x4aL1p+ESOBfTIJ6Y/iWM40Qzz9Gmdx5tLOA/bYwtu81eLi3ZydPxFLnIVb9J0vk92mB234THjetdLlQYtop300w2l0wBll0odhzBnOmB25Di4zVA/fhe8SzNoywc9vyS8w4LSiVL/bkwqmy+kmvTOqa6rWQ6Wsgkfcs4euBe9NI8zDCYBeK9bTSNgoliY5RTvvAI+FkF0pr4EvMmDfb6AetGOV4Bjzo8TaBablBvU8LJHMB2uID9/4n0lYifeIu4kUf4B/v3j7w2E83kXra0dp1qFvA/w4nkz5wf97x08pPlwsl5rFOt2CiLtCkDbiIXCoia0Rkl4jsFJH7EpHPEbtrmXiV2RYwuSKWke5s8OFYeWnhfwlk6SWmhThx9pMl0gVqp7sr/GK0lnasG1Wne9ZZPvBcuJYSLYOdOke8GQS5UMKCopKud5hW2pRI6NNNsHszEqlMpW8DvmeM2SIi/YESEVlljNkVTzCeUp1xUtDOZuLp29oe5K9IKr1YLhRfT8bXA/cMI4ydh5WZmImOQrFKRBdKGs5noh/PiDUTMFvjwFMl0y8xI01aC9UhmkaZ+ip9YKtIxC2SLheK5XdLQe+w4sdP5av0x40xW7zbdcBu4BKr8pGmU4tAVV0TTa2e7mVFdQN7TtTSFGHauY/QxZJCae8wHK5u4GxDS1obzcnazrUrTrmbaW43nGtsDfON+5YQqG9pI5T2GN3qmvoWyqvcuL0L87ib2zhUE76QTuBLyZqA6dJ7T0SeHt7Q2sbRs43+haUqz8ReYOpMQytHahpiTueudjfjbuks9+HqBto7jH/BrUAqTtfjbm7jtLuZU3XN1HoX+2lpN0HTs5ta23E3t3HUq19jazun3c2cdndOSY+2CNXxs43+J6RIBr+lrYP9p9xB09tNyHE7CGx/gdP7wdNW2to7aGptD5uWHsjB0+HnvLKm85ydrG0Ku17ONrQGleFkbRMNUfJoam336xZNjxPnmqhraqWhpc1vZHZUngvK13sJh/VozzVGXszp6NlGv44tbR2caergtLs5KP6RmgbONbZyrqGVandzmHy0VUCjmeRzja1hrshQ/VraO4IWoDoXsDZR6HIEvvYbGO5bEOxEQNs/fq7RH6expfN8bz181r9oHBDUtqMhdhg1ESkC1gITjTFRF5I47+LLzcX3/CpqOg99/kp+8palDnxSLJw3jn+cO8ZS3OLiYlwul3//uy9v4/WtRy3nNXpgHvvPJXbhf+fGsfxydfwFbGLxu69OZd6Ei/jRC6t4YVfsm1smGX/xAEtrjIy5oIDV372e8T9cRmNb52JX03+2mlMh6+eIdPZYvjL9Uiqq69lwIPJqc5+fPIzH77yah9/axeKPDvLDW8fzjTmjALj9fz4KWiHy6ssGcUXfBl7a46m/T40eyovfnJVQeUPbD8C7u0/y9ec3h8Ud0q8XNfUtfHHqcJZ9cjziOjmJMLhvT355XS/6jpjEl3+33rLcnMvPp+ykmxO14TfaWHmdaYhslAE+O/Ei3i49ERb+r7dcwYLrRlP04DJ/2FWXDOStb3+a+1/ZzqsllZZ18PHvX5jI3bNGAPDA71eyZK9Hr2EDe7Pu+zfwrf/bzDs7TwbJ3DXzMl7ceDhu2hWP3sq5hlYmP7wyIZ3GXljAj2+bwF1Pbww79j93XcM/vrjFUjqH/vNzJcaYaaHhKa9GKCIFwGvAv0Qy3iKyAFgA0Oui2MZzX1nklb3s4rUNZUwQaw3D7XZTXFzs3z9+0nqjBhI23gB79x9MWCaU0tJSzju1h60nmsmlj01ZXSCqvMpT743eBxTfOQg13hD8uHns+HE2VIY/1fh4a/sx7rj4HEePetIpK99Pcbvnwt1+JLhXW3uulu117fjqb93+6qC2YIXQ9gOw6mBkQ9fc4rlRJGO0InGmoRW3u4W9W7cmJldzhhO1id08WlqjG2+AU6dORQwvL99PcceRoLAdR89RXFzMqyWRl2iNx2vrdjO8yXMNtTS34Dt/zc3NFBcXU3Uq/Bp+fXN84w2edljVkPg1ve+km9eLIxvp1z/8JOH0QknJgItITzzG+4/GmNcjxTHGPAU8BZ4eeKz0Ro0aDXt2p6JSTPoPGIDLNdtS3NAe1Jsnt8Ex6z3wZLho2HA4mJoRnzBhIq6JF/HrLSuA1Hpy2cLlcsGKZZ3b4N+PxrCLL4bKIzHjuFwuPqrfBRUHGTVqFK7rR0dMu/+AAbQ31hFYf6G96XhE6oGX5x+AveHtOy+/B7RGv/kkQ0FBAVePmASbrPfABw8ZDNWnE8qnR4+eEMOIX3jBBXDyeFj4yFGjcblGh9V94LlPlKFDh+JyTQdgxcFVgOfG2KdPb1wuFy9UfAxVVUEy+fn50B7/OnG5XByqroe1xQnrNXLUKCjbGxZ+6fDhcLgi4fQCSWUUigDPAruNMY+lpIWXtkwv6pBjtFpcJjQ23bsO49G5Fkd0Mj2M0I45BNki2Reo6ZiAF1iNwRN5fMdzq57teF+ayjjw2cBXgc+IyDbv75Z4QrHItQoOJBNDy6JNj08EB9uClLB6eiwtxmVMRi24lQlcjiXGWHK7r6lo9sP34jrRr+2kGztGvCTtQjHGfIjNTtZcq+BAMqFZS1vqueTyTTAX6JzKHbueMrkeSg43+/gkqXs6OkTR6tG//ESOVbQdxjOnZmKmvYJTaDSZsIt2uFBirbXSlbFqcK10etJVddHOiZNdKMlq7umB26pK0E0h0LAlsvxE7PRTEg8j2y4U28m1O2SmscOAaw88NlZmW2bYg+LocxZ3/fiocvbrEm0R0CwsgGgJO1wouWXA013DKVRYJs590GzOJPEv75k7IwhzinR8ESbRvENxcr8l2XrsMPY7qQJvJkFVHWcFT6vYfU3Z8fWfnDLgTn6UtAM7XmJmei1rp2H1m4zpMPC51gO0g/ifM0t8jeukdYmTaK494duxblZOGXAdRmiDC8WeWd9dFmsfNe7e7TARkr4pGZPWUSiBxtHnqsi1zzh2PRdKDhvwTJz8Vh2FkjTWhxH6euCZ94F3RbdWvHqK1h7TMdY++kckPKTiQjHG2P4E1eV64Dk9CsVGNaIRa4EoqxjjcUXl8L0wLkEfiLZ4zix9wNkY/8Ucy12XjhESvnS7HHF9UZGD09HR8N2UQ9uM78bZlsI7pg6TBg+BDXd0Wxazskq8xawyweN3Xs3nJw+LGy90KvQDr25nyWZ71qpQrHN+wXmWVmXLBJcM6sNHD37GUtxIU+mfXnuAny1P31IRXYEvTR3OKzatCdOViLaYVU71wDPBa1uSaxwThg20WRPFCrlivAH/krtK+lDjnRjdzoAnS669ALGTEUP7ZlsFRVGSQA24Rbqu+U7fdwAVRUkvasAt0oU74OR3xeERitINUANukS5sv9P2HUBFUdKLGnCLdGUfuLpQFMWZqAG3SBe23+RpK1AUR6KXrkV0erWSKtqGFLtRA26RrtwDlxz6+HFXpiu3ISU7qAG3iF57Sqo4eXkDJTdJyYCLyHwR2Ssi5SLyoF1K5SLae1IgtZfZ3XWhMSV9pPJV+nzgf4DPAlcCd4rIlXYplmuo/1IB7UUruUXSHzUGZgDlxpgDACLyJ+B2YJcdiqWL4r2nuOmx9+PGq29ooN+Wzng19S3pVCur9OmZn20VHMO8X6219MYgtP1A125DSnZIxYVyCXAkYL/SGxaEiCwQkc0ishlj6JUHfXrAhX2Fv768J381pidjBnnUKOwjXNi38/LoHWBXJp7fuXP5oDxGDexUvad38+YR4fejf5h8XtD+1AvzGSiNcX8XntcRtD+yoJ1ZF4cbupkX5fvz/+Gs3swv6hkWZ2jv6Jf8sH7Bx1yXdpbh6gsiG9aLvDIzLvIcnzA0j9tG96RfeNZ+rhiSx4V9hcI+HtnAsnxtTBMX9Q3Wo28PT1ezX08YMSCPsYPzIpY/lHxvMlcMMky/KJ9pF3bK9MiDSwqC8zm/j9C/V+f+pf3z6OWtzwv7ir+MAMMLhAlD8yKuo3zDZT2YPayH5TWW+0Touowb7Mk4sK0F0jsfBlloO5Haj68NRWKSN7/QcwCda1lPLsxn/BCPfsO9dei7Vob2Fm64rAe98jrrPxHmF3VWxsgBeRT2Eb52hfG3s1D69PCcy/69YFKhR/e7x/fib8b1YuTAYJMyKmT/iiF5XDe8R5ieUwrz6Z0f3D562PyGroc36SFRrsehvT2v88d620FBjOtpdEC5QsscicA2HkphH087H1aQxMkz3i9jJPoDvgg8E7D/VeC3sWTGjh1rrLJmzRrLcVVe5VVe5buyPLDZRLCpqdzjjgKXBuwP94YpiqIoGSAVA/4xcLmIjBSRXsBXgL/Yo5aiKIoSj6RfYhpj2kTkn4B3gHxgsTFmp22aKYqiKDFJZRQKxpjlwHKbdFEURVESQGdiKoqiOBQ14IqiKA5FDbiiKIpDUQOuKIriUMRkcIEdEakD9lqMPhA4l0J22ZY/HzidxfyzLa/l1/Jr+e3Lf5wxpn9YrEize9L1I8psoihxn0oxr2zLWy5rjuqv5dfya/lzpPzR0stlF8pbDpdPlWzrr+XPLtnWX8ufXSzln2kXymZjzLSMZZhFulNZI6Hl1/Jr+e0rf7T0Mt0DfyrD+WWT7lTWSGj5uzda/gykl9EeuKIoimIfuewDVxRFUWKgBlxRFMWhqAG3iIhcKiJrRGSXiOwUkfu84UNEZJWIlHn/B3vDrxCR9SLSLCL3x0sn17Gx/L1FZJOIbPem85NslSkR7Cp/QHr5IrJVRJZmuizJYGf5RaRCRHaIyDYR2ZyN8iSKzeUfJCKvisgeEdktItcmrZf6wK0hIhcDFxtjtohIf6AE+AJwL1BjjHlURB4EBhtjFonIBcAIb5wzxpj/jpWOMSanvyVqY/kF6GeMcYtIT+BD4D5jzIaMFyoB7Cp/QHrfBaYBA4wxn8tcSZLDzvKLSAUwzRiTykSXjGJz+Z8HPjDGPCOebyn0NcacTUYv7YFbxBhz3BizxbtdB+zG8w3Q24HnvdGex3PCMMZUGWM+BlotppPT2Fh+Y4xxe3d7en8534uwq/wAIjIcuBV4Jv2a24Od5XcidpVfRAYC1wHPeuO1JGu8QQ14UohIEXA1sBG40Bhz3HvoBHBhkuk4hlTL73UfbAOqgFXGmG5VfuBXwANARzr0Szc2lN8AK0WkREQWpEfL9JFi+UcCp4Dfe11oz4hIv2R1UQOeICJSALwG/IsxpjbwmPH4oyz1JmOlk8vYUX5jTLsxZgqe76jOEJGJ6dA1HaRafhH5HFBljClJn5bpw6b2/2ljzDXAZ4F/FJHr7Nc0PdhQ/h7ANcATxpirgXrgwWT1UQOeAF6f7WvAH40xr3uDT3r9Yz4/WVWS6eQ8dpXfh/fRcQ0w32ZV04JN5Z8N3Ob1A/8J+IyI/CFNKtuKXeffGHPU+18F/BmYkR6N7cWm8lcClQFPna/iMehJoQbcIt6Xb88Cu40xjwUc+gtwj3f7HuDNJNPJaWwsf6GIDPJu9wFuAvbYrrDN2FV+Y8z3jTHDjTFFeD4E/p4x5u40qGwrNp7/ft6XgHhdBzcDpfZrbC82nv8TwBERGecNugFIfgCDSWHFrO70Az6N5/HoE2Cb93cLMBR4FygDVgNDvPEvwnO3rQXOercHREsn2+XLYPknAVu96ZQCP8p22TJZ/pA0XcDSbJctw+d/FLDd+9sJ/CDbZcv0+QemAJu9ab2BZ+RKUnrpMEJFURSHoi4URVEUh6IGXFEUxaGoAVcURXEoasAVRVEcihpwRVEUh6IGXFEUxaGoAVcURXEo/x+kFXPs/AlF6gAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -77,12 +100,12 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7mklEQVR4nO29eXhV1bn4/3kTAgGCQQYFxQtOUBUBFRkuIsEJqlZb2+/v1qtWb21pq7XYQaG3VmmvVm9tnadatXod4eJY8CIKHBUHkEBQJhnDDIGEhJwMZFq/P86QM4/7nLN38n6eZz/JWXu9a71r2O9ee+213i3GGBRFURTnkZdrBRRFUZTUUAOuKIriUNSAK4qiOBQ14IqiKA5FDbiiKIpDUQOuKIriUNSAK4qiOBQ14IqiKA5FDbgSFREpF5EGEXEHHMeJSDcRuVdEdnjPbxKR20REQuQvF5HlIlInIpUi8rKIDAo4f4OItAakvU1E/iEiQ0PSuVFENohIrYjsF5F3RaRXHN0ni8gSEakRkfIkyjzEK1fvzfOikPO/FJF9InJYRJ4TkW4B5/5LRL4SkRYRmZVEngNF5FkR2est4wYR+YOI9AyIIyKyVUTWJZHu/ycin3rL4kpUTnEOasCVeHzLGFMUcOwB/he4ELgU6AVcB0wDHvYJicj3gFeAh4B+wBnAEWCpiBwdkP5nxpgioBi4CGgASkVkuDedScCfgKuNMb2A04DZCehdBzwH3JZkeV8FVgF9gd8Bc0Wkv1eXKcBMb9kHAycBfwiQ3QzcDsxPNDMR6QN8BnQHxnvLeDHQGzg5IOr5wDHASSJyboLJV+Gp//sS1UdxGMYYPfSIeADlwEUhYRcCjcAJIeFjgVbgFECA7cDtIXHygDXAH72/bwCWRsh3HjDX+/9vgLfSKMNFQHmCcYfiucn0Cgj7GPip9/9XgD+F1MW+COm8BMxKMM+7ga+AvDjxngNeBt4AHkuyDn4EuHLdn/Sw/tARuJIsFwPLjDE7AwONMcuAXXiM2jDgX/CM1APjtAGve9OIxRvARO//y4Ap3imFCYFTFhngDGCrMaY2IGy1N9x3fnXIuWNFpG8aeV4EvOGtm4iISA/ge3gM+MvA90Wkaxp5Kh0ENeBKPN4SkWrv8Rae6ZC9UeLu9Z7vF/A7WpxY7AH6ABhjPgauAs7GMzVRKSIPiEh+UqVIjCKgJiSsBs80UaTzvv9jzsfHoS/R69PHVXieDBbiqYMC4LI08lQ6CGrAlXh82xjT23t8GzgIDIwSd6D3/MGA39HixOJ4PPO3ABhj/s8Y8y08Rv1KPFMvP0q0AEngBo4KCTsKqI1y3vd/LalTSfT69HE9MMcY02KMacTzFHN9GnkqHQQ14EqyfACMFZETAgNFZCxwArAY+BrPdMr/C4mTB3wXWBQnj+/gmXsOwhjTZoxZ5M1jeKoFiMFaPC8JA0fUI73hvvMjQ87tN8ZUppHnB8B3vHUThnfVzgXAtd7VL/vwTKdcKiLxnmSUDo4acCUpjDEf4DHAr4vIGSKSLyLj8Ly4e9IYs8kYY/C8fLxDRP5dRApFZADwDJ5R64Oh6XrTOVFEHgVK8K7uEJErReT7InK0dyndGGAS8HksPUUkT0QK8Uw3iFeHmPPGxpiNQBlwlzf+d4AReEa8AP8D3Cgip4tIb+AO4PmAPAu8eeYBXbxpxJvqeQBPnbwgIoO96RzvnSYagWeFz0Y87xVGeY+heG6QV8epg3yvPl2APK8+BXH0UZxErt+i6mHfgwirULzhhcB/AzvxLPvbjGd5XV5IvCuBL/As6avCs0TvhIDzN+BZueL2xtkOvACcFhDnfDw3jIN4pio2ErK6JYruJYAJOVwJyA0BXN5yfR1afuBXwH7gMPAPoFvAuecj5HlDAnkeh2eVyT5vGTcAdwE9vP/fEkHmdmBFnHRviKDP87nuV3pYd4i3oRVFURSHoVMoiqIoDkUNuOJYRGStBG/z9x3XxJCZGEXGnUE9n4qS51MWpB2xLCIyMb604nR0CkVRFMWhdMlmZr179zannHJKQnHr6uro2bNn/IgWy3Z2eSfrnq686u5MeSfrHib/9deev8OGBcUpLS09aIzpHyaczTemQ4cONYmyZMmShONaKdvZ5Z2se7ryqrsz5Z2se5j8pEmeIwSirDjSOXBFURSHogZcURTFoagBVxRFcShZfYkZiebmZnbt2kVjY2NQeHFxMevXr08pzXRkO7t8tvIuLCxk0KBBFBTozm5FSZWcG/Bdu3bRq1cvhgwZggR8kau2tpZevVLz0pmObGeXz0bexhgqKyvZtWsXJ554Ysp5KUpnJ+dTKI2NjfTt2zfIeCsdGxGhb9++YU9diqIkR84NOKDGuxOiba4o6WMLA65kl/qmFuqbWnKthqIoadLpDXh1dTVPPPFEyvIPPfQQ9fX1FmrkYcWKFfziF7+wPF2AzRVuNldkzPWHoihZQg24DQ14S0sLo0eP5pFHHrE0XUVROhad3oDPnDmTLVu2MGrUKG677TYAHn74Yc4991xGjBjBXXfdBXj8FVx22WWMHDmS4cOHM3v2bB555BH27NnD5MmTmTx5csS0Tz/9dEaMGMFvfvMbAA4cOMB3v/tdzj33XM4991w++eQTAGbNmsV1113HhAkT+PGPf4zL5eLyyy/35/3DH/6QMWPGcNZZZ/H2228DsHbtWsaMGcOoUaMYMWIEmzZtynh9KYpiH3K+jDCQP/xzLev2HAagtbWV/PzUPjweKHv6cUdx17fOiBr3vvvuY82aNZSVlQGwcOFCtmzZwvLlyzHGcMUVV/DRRx9x4MABjjvuOObPnw9ATU0NxcXFPPDAAyxZsoR+/YI/T1hZWcmbb77Jhg0bEBGqq6sBmD59Or/85S8577zz2LFjB1OmTPGvm163bh1Lly6lpaWF0tJSf1r33HMPF1xwAc899xzV1dWMGTOGiy66iKeeeorp06dzzTXX0NTURGtra0r1pSiKM7GVAbcDCxcuZPHixZx11lkAuN1uNm3axMSJE/n1r3/NjBkzuPzyy5k4Mba75eLiYgoLC7nxxhu5/PLL/aPpDz74gHXr1vnjHT58GLfbMx99xRVX0L17d2prgz9yvnDhQt555x3+8pe/AJ6llzt27GD8+PHcc8897Nq1i6uuuopTTz3VsnpQFMX+2MqAB46Uc7UZxRjDr371K6ZPnx52buXKlbz77rvccccdXHjhhdx5551R0+nSpQvLly9n0aJFzJ07l8cee4zFixfT1tbG559/TmFhYZhMNJeUxhhef/11hoW4mDzttNMYO3Ys8+fP59JLL+Vvf/sbF1xwQZIlVhTFqXT6OfBevXoFjXinTJnCiy++6B8V7969m4qKCvbs2UOPHj249tprue2221i5cmVEeR9ut5uamhouvfRSHnzwQVavXg3AJZdcwqOPPuqP55u6icWUKVN49NFHMd6Pb6xatQqArVu3ctJJJ/GLX/yCK6+8ki+//DK1SlAUxZHYagSeC/r27cuECRMYPnw43/zmN7n//vtZtWoV48ePB6CoqIiXXnqJzZs3c9ttt5GXl0dBQQFPPvkkANOmTWPq1Kkcd9xxLFmyxJ9ubW0tV155JY2NjRhjeOCBBwB45JFHuPnmmxkxYgQtLS2cf/75PPVU7C9r/f73v+fWW29lxIgRtLW1ceKJJzJv3jzmzJnDiy++SEFBAQMGDOA///M/M1RLiqLYkU5vwAFeeeWVoN833XQTM2bMCAo7+eSTmTJlSpjsLbfcwi233BIWPnDgQJYvXx4W3q9fP2bPnh0WPmvWrKDfJSUllJSUANC9e3f+9re/hcnMnDmTmTNnhoUritI56PRTKIqiKE4lrgEXkedEpEJE1kQ492sRMSLSL5KsoiiKkjkSGYE/D0wNDRSRE4BLgB0W66QoiqIkQFwDboz5CKiKcOpB4HbAWK2UoiiKEh/xLU2LGUlkCDDPGDPc+/tK4AJjzHQRKQdGG2MORpGdBkwD6N+//zlz5swJOl9cXMwpp5wSJmfVTkyVD2dbjWfH5onF4XGyqfvmzZupqakJCnO73RQVFaWcfzryucw7XXkn656uvJN1D5UfdeutAJQ99FBQnMmTJ5caY0aHCUf6VH3oAQwB1nj/7wEsA4q9v8uBfomkM3ToUBPKunXrwsKMMebw4cMRwxMhHdnOIL965yGzeuehnOQdSKS2X7JkSVr5pyOfy7zTlXey7unKO1n3MPlJkzxHCMAKE8GmprIK5WTgRGC1d/Q9CFgpIgNSSKtDkOzd95FHHuG0007jmmuuCQovKyvj3XffTTr/PXv28L3vfS9puVTJpKtbRVESJ+l14MaYr4BjfL/jTaEo4TzxxBN88MEHDBo0KCi8rKyMFStWRPSz0tLSQpcukZvruOOOY+7cuRnRNZIeo0ePZvTo8Kc5RVGySyLLCF8FPgOGicguEbkx82pll5deesnvlvUnP/mJ36tfUVERv/vd7xg5ciTjxo1j//79AGzbto3x48dz5plncscdd0RN94EHHmD48OEMHz6ch7xzWj/96U/ZunUr3/zmN3nwwQf9cZuamrjzzjuZPXs2EyZMYPbs2UEuZq+77jrKy8uZOHEiZ599NmeffTaffvopAOXl5QwfPhyA559/nmuuuYapU6dy6qmncvvtt0fU7aF7Z0V0dXvttdfGdHV73XXXxXR16/PWqK5uFSXzxB2BG2OujnN+iGXa3HoreH2DdG9thRRfpgXJjhoFIS8EAlm/fj2zZ8/mk08+oaCggJtuuonZs2fzk5/8hLq6OsaNG8c999zD7bffzt///nfuuOMOpk+fzs9+9jN+8IMf8Pjjj0dMt7S0lH/84x8sW7YMYwxjx45l0qRJPPXUUyxYsCDMBW3Xrl354x//yIoVK7j33nvp1asXs2bN8ruY7d69O/X19bz//vsUFhayadMmrr76alasWBGW91dffUVZWRndunVj2LBh3HLLLZxwwgn+89WHqli8YD7bNm8Mc3V78803c8kll0R1ddu9e3dcLpc/rVBXt6NHj+Zb3/qWurpVlCzQ6bfSL1q0iNLSUs4991wAGhoaKC4uBjxG1TfSPOecc3j//fcB+OSTT3j99dcBuO6668K23QMsXbqU73znO34Pg1dddRUff/yx301tovhczAI0Nzfz85//nLKyMvLz89m4cWNEmUmTJvnLcPrpp7N9+/YgA17U6yi6desW0dXtmjVryMvzPJhFcnUbSqir2yNHjqirW0XJEvYy4AEj5YY0XMImI2uM4frrr+fee+/1h/m8CxYUFPi/np6fn09LS/uHgLP1VfVAF7MPPvggxx57LKtXr6atrS2iS1rw3Hh8hOoNHle3L/9zERVfl4a5ul20aBH9+/ePqUcgJsTVrc+Vr7q6VZTM0+l9oVx44YXMnTuXiooKAKqqqtixI/bm0gkTJvDaa68B8PLLL0eMM3HiRN566y3q6+upq6vjzTffjPsRiGiuaX3U1NQwcOBA8vLyePHFF1Oelqivc1Nbeziiq9tAp1mpuLr1paWubhUl83R6A3766adz9913c8kllzBixAguvvhi9u3bF1Pm4Ycf5vHHH+fMM89k9+7dEeOcffbZ3HDDDYwZM4axY8fyox/9KO70yeTJk1m3bp3/JWYoN910Ey+88AIjR45kw4YNUUfF8ahzu7nlhu8zYsQIzjvvvCBXt6tWrWLEiBGcfvrpcd3cgsfVbXNzMyNGjOCMM87g7rvvBmDOnDkMHz6cUaNGsWbNGn7wgx+kpKuiKDGItDg8U4du5LGHvG7ksV421/JO1j1deSfrHiafhY08iqIoig1QA96JaG1Tv2OK0pGwhQE3CTjUUtLjUH0Ta/fU0Nhsj/XY2uaKkj45N+CFhYVUVlbqBZ1hDjc0A9jCgBtjqKysjLoMUlGUxMj5OvBBgwaxa9cuDhw4EBTe2NiY8gWejmxHla+qa6K+qZXmyq5U1TUBsL42fGNOtnQvLCwM8wWjKEpy5NyAFxQUcOKJJ4aFu1yupHctWiHbUeVvfmUl87/cy6NXn8Ut76wCoPy+y7KSt6IomSHnUyiKoihKaqgBVxRFcShqwBVFURyKGnBFURSHogZcURTFoagBVxRFcShqwBVFURxKIt/EfE5EKkRkTUDY/SKyQUS+FJE3RaR3RrVUFEVRwkhkBP48MDUk7H1guDFmBLAR+K3FeimKoihxiGvAjTEfAVUhYQuNMb7vdH0O6J5oRVGULCOJOJESkSHAPGPM8Ajn/gnMNsa8FEV2GjANoH///ufMmTMnIcXcbjdFRUUJxbVStqPKP1HWyPJ9rfxsZDeeXH0EgOenhn/Rx466Z0tedXemvJN1D5UfdeutAJQFfB8YYPLkyaXGmNFhwpG+8hB6AEOANRHCfwe8ifdGEO+I9EWehL5SkSS2+sKGTeRvernUDJ4xz7xTttsMnjHPDJ4xL2t5O0VedXemvJN1D5NP8os8KTuzEpEbgMuBC70ZKIqiKFkkJQMuIlOB24FJxph6a1VSFEVREiGRZYSvAp8Bw0Rkl4jcCDwG9ALeF5EyEYn/+XJFURTFUuKOwI0xV0cIfjYDuiiKoihJoDsxFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHEoin1R7TkQqRGRNQFgfEXlfRDZ5/x6dWTUVRVGUUBIZgT8PTA0JmwksMsacCizy/lYURVGySCLfxPxIRIaEBF8JlHj/fwFwATOsVExRkuHDjQfIk1xroSjZRYwx8SN5DPg8Y8xw7+9qY0xv7/8CHPL9jiA7DZgG0L9//3PmzJmTkGJut5uioqKE4lop21HlnyhrZPm+Vn42shtPrj4CwPNTe2Yl72zI37CgDoDHzjOdst84Wfd05Z2se6j8qFtvBaDsoYeC4kyePLnUGDM6TNgYE/cAhgBrAn5Xh5w/lEg6Q4cONYmyZMmShONaKdtR5W96udQMnjHPvFO22wyeMc8MnjEva3lnQ95Xps7ab5yse7ryTtY9TH7SJM8RArDCRLCpqa5C2S8iAwG8fytSTEdRFEVJkVQN+DvA9d7/rwfetkYdRVEUJVESWUb4KvAZMExEdonIjcB9wMUisgm4yPtbURRFySKJrEK5OsqpCy3WRVEURUkC3YmpKIriUNSAK4qiOBQ14IqiKA5FDbiiKIpDUQOuKIriUNSAK4qiOBRHGfClmw5S29icazUUG7OuspWaBu0jSufAMQb8oPsI1z67jJ+/sirXqig25s9fNPLjF1bkWg1FyQqOMeCNza0AbK5w51gTxe5srKjNtQqKkhUcY8AT8HqrKID2FaXz4BgDriiKogTjGAMu+rUVRVGUIBxjwBVFUZRg1IAriqI4FDXgiqIoDkUNuKIoikNRA64oiuJQ0jLgIvJLEVkrImtE5FURKbRKsVB0ba+SKEY7i9JJSNmAi8jxwC+A0caY4UA+8H2rFFMURVFik+4UShegu4h0AXoAe9JXKTJ2XAe+o7JeR3uKouQMSccAich04B6gAVhojLkmQpxpwDSA/v37nzNnzpyE0na73RQVFfl/H2xo4zcfNtC3UPhrSY+kZJMlEfnymlZmfdbI1d/oypQhBVnPP1n5J8oaWb6vlZ+N7MaTq48A8PzUnlnJOxvyNyyo8//fows8cVF42TKVtx3knax7uvJO1j1UftSttwJQ9tBDQXEmT55caowZHSZsjEnpAI4GFgP9gQLgLeDaWDJDhw41ibJkyZKg3zur6szgGfPMv967KGnZZElEft7qPWbwjHnmpy+uyEn+ycrf9HKpGTxjnnmnbLcZPGOeGTxjXtbyzoa8r0yDZ8wzZ961IKt520HeybqnK+9k3cPkJ03yHCEAK0wEm5rOFMpFwDZjzAFjTDPwBvCvaaTnKOw4paMoSuciHQO+AxgnIj1ERIALgfXWqOUcdApcUZRckbIBN8YsA+YCK4GvvGk9bZFetsc3ADeoBVcUJTd0SUfYGHMXcJdFusTJKxu5JI5OodgXm3UVRckYuhMzTex2Y1EUpfPgGANuvxGv7RRSFKWT4RgDbld0AK4oSq5QA54ivicCnUJRFCVXqAFPEZ1AURQl16gBVxRFcShqwBVFURxKTg14pfsIQ2bO59XlO+LGnXS/K/MKpYR1k+BDZs5n1jtrLUuv06LvJWzLjc9/wZCZ83OtRochpwZ8R1U9AK99sTNu3NY2e12VkqF1jc9/Wp6RdBXFDizaUJFrFToUOoWSJroKxYboG2alk6AGPEXafaEoiqLkBjXgKWK/naGKonQ21ICnidE5FPuhTaJ0EtSAp4iOwBVFyTVqwNNEB3uKouQKNeApIt7XmDqDoihKrlADnio6haIoSo5RA54mOgBXFCVXpGXARaS3iMwVkQ0isl5ExlulmN2xegCuq1msQ2tS6Syk9U1M4GFggTHmeyLSFehhgU6OwirDq/ZbUZRkSXkELiLFwPnAswDGmCZjTLVFesVl0fr9TLhvMU0tbdnKkoamVsb9aRFLNx0M8oXS1NLGxD8v5oN1+7OmS6o8/dHWqOfmfLGT33/SkEVtOiffenRpVAduK8qrGH33BxxubM6yVqkz7X9W8MamplyrkRF81/wnmw9mL8/mVkb9cSF7quNfi5LqCFJERgFPA+uAkUApMN0YUxcSbxowDaB///7nzJkzx39uS3Ur//V5IycV53Hn+O5B6bvdboqKivy/b1jgSbZvofDXkh782lVPZaPh/vO7079HXkzZZIkmX17TyqzPGhl8VB7fO7WAv5Ye4Yy+efzH8G785sMGv26p5N9mDD98z+Pc67HzjOX6P1HWyPJ9rUFhz0/tGfTbV8eh4enmnQ15n+4Ahfnw1MXJlyFbukerZ7fbzePr8llf1cZtows5o1++5XlnQj6ZfhOr7Knmn8myB17zf/jX7hHjWJn/qFtvZX99G1Ouuperv9GVKUMKAJg8eXKpMWZ0qGw6UyhdgLOBW4wxy0TkYWAm8PvASMaYp/EYeoYNG2ZKSkr854p3HILPP6XXUUdRUjIhKHGXy0VgXBZ4XFAWFhZSUlJC4eeLobGBcePGcUKfHrFlkySa/Fe7auCzpfTqVcTIkd+A0uUcfXQfxo07Ez5c4tctlfxbWtvgvf8DoKioyHL9/3fPSti3NygsLA9vHWei7jIuv6DdRWmXLl1SSiNrukepZ5fLRZ8+hVBVyciRIznv1H7W550J+WT6TYyyp5p/JsseeM2XlEzMfP69e1Pd6hnInXzyyZRMPCmmbDovMXcBu4wxy7y/5+Ix6FkhlzshjdGdmIrSGTA2fyWesgE3xuwDdorIMG/QhXimU7KCXT4qbFUD27ubKNnG7oZDsQfprkK5BXjZuwJlK/Af6auUHLnq6GLxQsJc34gUe2B1v1I6NmkZcGNMGRA2sZ4NcrGVPdK0iRpe+6Fr6hWrseuN1bE7Mf1TKDnO3yr0kVkJRO9BSiI414B7/3aU0VYHKYaSJrkemCjOwrkG3CbLQNTwKoqSKxxrwH3kbAolR/lmg47yVONktA2URHCsAW+fQsmpGtYtI7TR9WonXTobdnmyVJyBYw14+xA4+9bGGDr0EFztt6J4yOVgJpG8bWHA91Q3MOudtbS2JV5bdrGfn2+t8lf07uoGDtWl5tQnl6tQnnBtpnR7VdTzLy/bzqL17Y663lu7j1eW7eDOt9ewr6YxGyp2OqL1hseXxG6rdDnc2Mxv3/iK+qaWpGXfWLmLeV/uyYBWSjTS3chjCQdqj/D8p+VceuZAxpzYJyEZ36Nmru6QgetC6wI6+5/f+5opiRUhiFze6f+84Oug3ybkEeN3b64BoPy+ywD4yYul/nPbK+t54YdjMq9kEjj5CSLewOT+9zxt5WsLq3nStYVXl+9gcN8e/HTSyUnJ/mrOagAuH3FcJlTrdCQym2aLEXgq+OfAc5V/QOUGG18nmw8PyZSgTSfMOxS+9tRmDSYXryYcM4WSDnboaME6pNbSNiiGHzvUaacnV22gbR+E3avDVgY8mbtc+4aHXPlCaccKHey0bCyZ8uiqCWvJdb9WnIWtDHgy2Mk3QaDt7Qj2zEb3kg5HvBt1zrtPzhWwF7mojmTydKwB95Gzl5hRLHWqDa42UwkkZzdR7YhB2L06HGvAc+0PPHAkFfgiL9URuI56Owfx2lmnpJRkcKwB95GLucLQHAOXr3cEQ9wRyqCkiN4/HIVjDXiuRyqBNi5wA1LKts9GRtPpL9DsfANKVDWdQrEXdr2v2cqAJ1NJdvGFAsFTKKnqYyejmUwZ7NqxnYrWp5IMaRtwEckXkVUiMs8KhRLPN5u5hecZaOSCXQDYxxCnivNLYF/stFw0InoHsQ2JDOqsGIFPB9ZbkE5K2OF6aGuzYARug3Io9iFn3UH7YRB2v+GmZcBFZBBwGfBMIvFrmw019c2Ap2JeXb4jYrwtB9ysqojtTMc3Gm4zhhc/K6exuTXo/J7qBv652jrHOuv2HObjTQcBWL/3MJsrav3nWqM0cm1jM68s28GnWw7y1a4af3hDUysvfr49qHMEprB0dzOV7iPsrWngnShl2HLAzQfr9vt1+2jjgaDzjc2t/M9n5bS1GWobm5n/5d6Ey5psp91c4ebvH23NamevaWjmtSj9Jx0qDjfydtluS9M8VNfE/67YCUBjS1vMuO2rq+LX5Tur97C3piEpXRZv2B/Ud6Ph+rqCTfvjx1vydUXcOP+7YieV7iMJ6Qfw6eaDrNldExZ+0H2Eq5/+PGmHce9+tZedVfX+30s3HWTtnvb0vzrQwlurdrPUe33HYv3ew/x14deU7axOSodofLTxAM98dYTWNsOHGw9Q39QaXyiAdJ1ZPQTcDvSKFkFEpgHTALoOOIUbnlrE9LML2VDVypwVwZ7sVq1ahbs8nxsW1AFw1jGusPQaGxtxuVy4az0d96l3l/N/25r5ZPVGvv+NrgC43W4uf2gJVY2GnlVfk5fkfIvb7cblCs7bp5OP37+9tl3vstX+//fs3Yu7WzMul4unVjfy+d72Bnl+ak8AXl5/hPe3t3BwxybOOsbTBLVN7RfsM1818fGuxRw6YqioN3Sv/JqCvOAy+PR5fmrPoP99+v/6uUXM39bM7m2bWX0g8s0wtIw+Pv54KT0KwussUvyqqip++tzHbK5uo09dOd1a66OmmwiR6j4Sj65qpHR/K417NzGkOD/oXGtba0o6uN1urnpkMbvchoIDGyPWQTz5SPne/0UDayvbaN2/icU7mv3hoXHdbjeVlZ5rYs2aNXQ9sCFqXh8sXsIvFtbTv7tw/6QeCdfbD0P6SqjuO3d6jOOybVVc/OBHYfFC+Y+Q6wKCy3Wgvo3bPmpg2NF5/HZs94hxAvMHwvqzj599UEdDC1zx0CL+a0L3iLKRuGlBHT0L4PELe0ZM/6+lR6C0LGKeW6s91+/h2lpcLpdf9tHFm4Out1T7vC+9P73yAc+uaeK1XdX0KfT0uy1btuJq2xlTPmUDLiKXAxXGmFIRKYkWzxjzNPA0QLeBp5q2br0oKZmA2VABy78IinvWWWcxekgfWDAfgJKSgGS9YYWFhZSUlHDUmqVwuIbe/QfCth0c1e9YSkpGAp7OUdXoqZiSSSXk5SV3IbpcruC8A/KPxPAzz4TSFQAMGDCAoqJDlJSU8MzmZUD7Xd2X5pv7VsH2PZw09DRKRh0P4BmhLP7AH/dIXiHVRxoBw/nnn0+3LsFGKqiOQurL5XLRq18f2LaTwScPZW393iA9QvUJLduE886juHtB5LxC4vfp08c7GmljzNhxbPlyeXjdJUHEuo/Ao+s/BQ5xxsizODegzwDk5+WnpIPL5aK2tQloZvy/TuDonl2Tlo+U772rPgJqGXXOaJa5t8J2zwg/NK7L5aJv3yKo2M8Zw4dTcsaA8Ey85Txv4vmwcAE1TUJJSUnC9Rbx2grQ/bP69bBtqz88bpoRrotAma0H3PDRhxzJK4zYV0Pzj6Vjgzf8cEtw+8Yt+4L51DWH999I/Tk0neIdh+DzTzmqVy9KSs6LGDfhuo+iG8CgE0+BNesA6NatEICTTz6JkvNje4RMZwplAnCFiJQDrwEXiMhLaaSXFIl+1DgbD/WtAU/Fqa9CyQwpvexNURn1TBifuFvpO9hLRN/TbxKu/pUkSNmAG2N+a4wZZIwZAnwfWGyMuTbhBCJ01FQ6r+96iOYbJRvzstHWgefyYkzUP0uk+knOmVVudsVmqmod91V4qyvC4vQy4Zyro93k0iH768Ctusp9H3SI0zGycSFasg48RM7EOJdQet4U4vX1SGnb7SkimyT6ZJcqVm3k8fU5u9sy38DKyurMSZltetew5Is8xhgX4LIirUTxVacdHs2CDHiKZszqjTz+J5Ms9rtcLLmyOkv/l56sTTbx/BOM15aD9k2FTDyd5XoXdiZItX6yPgK3qh0DlxEG/g7LLwtXYjLf8kyUdPX2icdzuxspm2SzzsXXkWJOC6WhSaZ3+FqVrt3XJ/vwLSBwir52wpZf5LG8HeOkl40t6kEv71LNLmwKJb1pGdNuwePEizAHnmIjdYRrNNMfVIiXavuNKXa7+EfgNp9EycSTcjIl7ug3Dlv5QkmG9o4RZw48KyPwgPxSTCOWnBUjymTyTT23jnCx5NYgJjpnbOI8eaaM5VNSvmRz0zesepLNNJHyseVHjWM3ZOK9MddzlYEEb6W3RqOgQX0aLzGTySeV/II+LWeHxrCKjE2hpHF7TLNP5JKc+e3PTbaWYMspFKtI9NEsKyPwKFvikyGWniml6X/JFW8OPL1lhBGyzCpWPyLbZRlhvCejjK1CsThBX/NYWZ/JpKVTKDYl0ZeY2SDaNE404xlx1Gv1KhSfDjH0sArxb9bI3sXin2qwPF0PVpYl6H1GvPwTbKqMrb6yOF1fclYa0lwY5Uybl1TLlHUDXn6wPi1nQburG5izYidflB8C2gu+fFsVry3fgTGGr6va/Y9k5SVmFG+EoY1+pKWVlTsO+X9/UV5F+cE6Nuw7HJbm4YZmmryT674y7q1poPxgsO+JQCdZgSQ6Rxqp37S2GZZtreRISyul2w+FRwhg1c5qmlt8enrCdlbVs+tQfVC81Tur2Xawjn01jWw7GO4/Ix7NrW2sKK9qD4hRrsbmNs8W7gAqDjeyJSTMR1VdE1/v8zhuyvSmpHV7wtvak5+n326u8OhYcbiRg+4jfodS+w83su1gu/7R+nVrm2H5tvZ6+nJXNXVHPL5wAh2+VRxuDHIYVd9sghw8+Vi7p4bDjc1xwwKpqW9mze6aID2srM9ISTW3tlG6vYoV5VW0eK+bmvpm1u+N7pDry13VSeXb0hrZEVmbCa5zgIra6P1t4/7aMOde1fUB9entg+WVdXGvP0vWgSeD+0gL018r447LTktYJtCTGMDtc7/0/+/rGFsO1DHzja9YueNQkJOsbC8jHNK3BxDZqM56Zy2vLt/JyBN6A/DS5zt46XOPR71PZ14QFPdwY7sDKl/q4+9dDED5fZf5z33rsaUR8/KPwCX50cOE+xbTZmDAUYXsOxzscMxnDHwEdjxfXU/885IwPa98/BP69uxKpdeTXOC5RPjrwo089eEW3vn5BEYM6h03/gV//TAojzF/WhQ138se+Zi9NY08P7Wnf2Rv5Qg8cKVItJvX7C92cu/y9rqe9c91/HXhRmqPtFB+32WM9ervo32df3DrPvXhFu5/72te+dFYzhxUzBWPfcIF3ziG5244l/988yt/PF99bLz7m3Ttksf9KxrZtmgpP5l0UlB6lz2ylFEn9OatmycEhY0cVMzbPz8vYln+7enP2OC9If7l/40ELH6iiZDUnxds4O8fbwPg5sknc9uUbwTpEYkrHvuEL2ddknC+Dy/aFDH8/e0tvPreZ/zjP85l8rBjABj3p0W0mcj97ZIHP6Jvz66U/v5if9hjSzaHxXt1+U5eXb6T9395flSdcjaFUtMQ/Q4eivtIdNeyoR3DNzL3YUW3ifd40xpwemDv7lHjrdntGX3VRih7zFUoaRQi7jrwCGn77kehxhugKYY71HhPO5VJugENxPeUUulOPY1o7K1pL2cmR+Cx0txWGW7YaxPo96Gt6xvB7zvc6G8rn+vTSE9rvnS21XjbNYKOkVynro7y5AcEGc19Xne3ls6BR6jIwDw37neHhUWjpTW2ZoFZfR0lvb1uT93tqW537RtviiuZa+FQfXRbaas58GiP+7HcwYa2ZWhMK+bL4m5rTvCDDilP56SyCiVBGSunmDL5tJPtac9sG/BcppVcvsll7H+JmeEplECSeeJM0lGp7ciZAbeqkkMbM9TWWzICj3M+eBVKAjlGKE+sCyMVIxs4hRIzntNe0mf4gsvES0wfVt4s/fpZvWokzvlkdx37YltanxGSCpxKSuadfbwnVLvv2rfVCDwasVZRhBq+ZD/ekAjxOl/SnTPSKhSr3/77X2Jmrwfm4maQKV8omViFkokReLqtG6pTWxwDnezqF3/6FpY9UttI0P/W9Xm7D3AcYcBjjcBDO1TYCNyCBkh1CsUqXVLbyOPVIcF4VpDJFT9hT1oZyylyfnbDMp8qISWNZ6CTvbH5b15JScVLM5zAay2ZMUuudohaRe4MeBK1HGtUHdqhwuJaYcDjJJLoVvqsummNcSMJipbh1QHWpR35pZ3VF2Ay36RMFiuTbLPoCStUp7jumVMsQ+h1atWOVB9BI/BkDLiz7bdT5sBjGfCQdEPiWnGBx2vkoI8aB/yfnNOdWOdSmQP3XeCx9bCy/2ZjI4+vfTM1M9S+QczCNDOwNNGqtEJTSaqvJ5K+N3qmNkb5yAuaA0+8c8TXqv06siO2mkJJpY5CjVv4KpSU1UmYtihf5Akl1rlYHTydIqSyjDBVHD6YCSIjq1BsmFbo9ZPo+55EDZp/Cjx0pJ9GASKOwAOnUJJKy9m91lYGPBqxRtGh9Z8XUiJLVqEkMSpJdSQda2ST0hx4ojLpv39NPs8UyNCiizAyMVr2kYnt5OmODJMdgRvvdGHC2ZrszIEHamTlCNzu9j1lAy4iJ4jIEhFZJyJrRWR6MvLJ1Esya6szsQol3jRMshd7pPjx3v4ngolwI7F6nWtsI5SNKZTMZhnqY8cKMvGirC2BG1qYcY4UJyQw8RF4Yh3LhP0TXZeEsXQEnnRWtiKdrfQtwK+NMStFpBdQKiLvG2PWJSQdoeaiVVasTtUWfWOgN5vMz4FHcycb1ZlVhLCYI/AEu5Ex4f6X464DT7KL5mwEHpJzpj9kYPsplAQSC5+2iCAUZsBjp+nrp4nWvn8jTybWwAcQOFBJZtCS7Ae87UbKBtwYsxfY6/2/VkTWA8cDCRnwlTuqI6Tpcdjko/xgHY0trTFH1Yfqg7ekHg7Zpl5V10SbgcKCPHoVFiSiWrhecc4Hbu+ubWyhrs1Q29hMc4jzm/3eren1Ta2EEmtL776aRtwBvlFCHeH4aDOGPISmVuPPY9N+N7sONUSMv7u6gYoI2+WjUVHbGFa/gdQ1tXKosb3Muw7V079XtyDdfZQfrKNfr240ePXs2iWP4u4FNLUaGppa6d41H4D6phZa24x/u3tTSxt7axqoCtiKXN8Uecv53poGBhZHd21QXd/E7oDtz8YYvz7VMbYvJ8s+r+6HQrZPu4+0UJDv6duhPmYCieQ/ZY/3OmluNew/3EhLiOU9VN/sd15VVdfEQfeRsDjgMWA1Qf5sIvfD6vomjGk3ni1tJqw8kaj1Or0KzXpvTQN9e3aje9d82toMlQ1tVNc3BcXbWVVPr8IuiEjQtdTSZthT3cBxAW4rJIEplEN1TRzds2uIfi0hv5spLMinID+PltY2f39vbG4Lu2731TRS3L2AI95r1+ceJLAtt1fWcVRhAT265dMlLy/ILUiowzUfyTyNixUjVBEZAnwEDDfGRHa3BnQbeKoZeP1DUdN5+rpzmPZiaVh4j675EY1esiTqQMnlclFSUuL/XdvYzJmzFiacz9HdhENHkqvXH0880e+MJ1V8jonOnvUuVY12f/gLZkjfHrhum8wpv51Pi2lvqyEz58eUe+VHY/n3Z5ZFPT972jjGntTXn05gHwhN+7kpPfjhe+2O01bfeQnFPRK/6Yf2G4DDjc2MiNF3Rp7Qm7ojLX4fJqkyol8+7/xmKr+cXcabqxL39vnhbSVMut+VcPy7vz2cO95ak7R+eQJb770srM7L77uMJ11b+O8FG5JO8+Hvj+LKUcfjcrl4bWcvFqzdB8B3zjqeB/9tVMS+8z8/HMMPnlseM92Jp/bjxRvH8ts3vuTV5TuT0mn1XZcw8g/h7T38+KM451+O5oXPtseUf+2VmQB8/9/v84fN+cl4xp7Ut9QYMzo0ftreCEWkCHgduDWS8RaRacA0gK4DTomZ1rJVX0UMt8J4g+cCSwS32x0Ut645OWOYrPEGKN+RXEeJxIcffUhBnjjOeAOUV9bjcrlo8aqeaFuVrV4d8/ybH66kYUe7EY6Vbm1tHYGTA++5PuaYHom/JgrtNwBVjbHn+FZHcBSVCl8ebMXlcrFvf+JPVACLl36eVPx/fr4+qfg+2kzkune5XMz/Ijmdfcz9eA3F1Ztwu90cPNiexv79+6O28xsfrYqb7sebDuJyuZi7Inm3xwuXfBwxfM3uw2yriDq2jcmqVdF1TsuAi0gBHuP9sjHmjUhxjDFPA0+DZwQeK71Thg6DNZGNuBWEjo6iETqSqqlvhkWJj8BT4diBx8H2HWmlMXHi+RQW5MOC2KNWu1JSUuLX3V//ccoycuRI+CL6CPzUU0+h5LwTw9I1xsCCd4Pi9izqCbSPwMeOHcvgvj0T1j/SCHx3dQO4FiecRjqUlJTw9v4y2JP4CPycc86Bzz5JOH6//sfAvr0paBfcvoFhz25ZBpUHk05vwIABlJSMxOVycewxR8F+j14DveGR+s6QIUNgc2S3sKF65X3wf/FfsoUwbtw4+GhJxHMFBQXQnPzU3IiRo6KeS2cVigDPAuuNMQ+kmk4gkebo7EA2tts2t6SfRza/iGMX4hU52gutSF0tE93PitVFmSTZl8F26mNBVZviVnonEKvO01kHPgG4DrhARMq8x6VppGfbzp6NPtuc5J0+Eja6tmxDNPfCkd79dMbqS9bY2cmAB634CgjvYPY7Zp2nswplKRbXVeiqDbuQjS7bHMexfCLY6eKyC3khQ3DfUstIY4VMVF9HaxM7jbEC6zZVd7JOIFad22onZrK+hrNFNrbbRvveXjLYtPpySuiSMt9FH8mwZqL67N4myV5zdnpKDtQkeB14x7LgmZpCsZxkHeVki+yMwC14+rBn9eWU0Es5lv3JyOYdm/ZpH1bsIs4VgW2ZqjdCJxCrD9nKgMf7Pl2uyEafbdIplIwQOhrTEXgwyRvwDCmSAtGmUDraLHissZ29DLidekcA2ViFYs0Uij3rL5PEa5vQVSjtBjxCWp1wBJ7soMlWfcxGqmQSx0yhZHp+LdWLKSurUCww4J2kPwcRr21CH6d9XSxbq1Ds3iYpfyLNBgSPwNvDnf6h4lAcM4VixVK6WKTa+XQKxb7EK3H0l5gR0jKxf6eC3dskWf3stNAgyIBjj1UomWhvx6xCsWIzSyxSfUnqmCmUNvs/sscimlfHWLQmcNMPTMuXR6T028K+D2nBTdWeK2P9JL0KxUb9K/BpKtidbPoWvK0ttas+Eze4WGla4swqUeI5s8oGiTi0Ct0Svae6gX+9LzvboRUPR/co4JCFHgHT4dUfj2P8yX3jxou0lX7tnhoue2RphjTLPmOG9GF5eZVl6Q07thdf76+1LD2nE8mZFcD2/748ojMrW43A7Yp9xhydB7sYb4DXV+5KWdZGA1ZLsHoErsY7PdSAJ4CTpyWU3GKnKQcrsOtejc5KpzPgqax00T7bubH6A7xOxkbvMBU6oQG361pzpWPS0Ubg+jRqLzqdAU/lgtI+27lJZ1laRxsvdLQbktPpdAY8lWU+2mk7N+k1f8fqOzZ1GNpp6XQGPJUplI51CSrZpKONwHUKxV50OgOe2ktM7bSdmbSmUDqYBbfTTkylExrwVJZBaZft3KRz/+5o9k6nE+1F5zPguoxQySLZcMOQTfRasBdpGXARmSoiX4vIZhGZaZVSmSS1R0DttUpqdDSDpxt57EU6X6XPBx4HvgmcDlwtIqdbpVim0BG4kk062pRDRyuP00n5o8bAGGCzMWYrgIi8BlwJrLNCsUxx3bPLKMiPfd+qq6+n58oP/b+PtOjaqc7M6yt38eWu6rjxQvsNQH1Ta4a0yg07qxpyrYISQDpTKMcDOwN+7/KGBSEi00RkhYiswEDXPOiWDwN7Ct8+pYBvn1LAoCLPa/5/6ZVH94BbynFF7a//v9GnXdXT+uTRsyBcoUtPDA7sWQDfPbU9rHc3oV+XIxRLQ8zj2G5tQb+PKTjC+IH59CkUf7oABXlw1jH5AFx7Wld+OLxrmE59CoVu+RHrj2N7BC9vuOCE9sIP7xtZaGBPj8wZfT31MeG4Lpw/KPp9uHsXOO94z/leXeGoru11+b2hBdx6drdgnbq3j7D6FAqn9M5j9LH59C2MvRSjq1fdQT0NYwbkM2ZAu/7d8sPL2qsr/voEGFQk5Ht/DugpnHNsu/xJxXmc1idyV50ypAsj+kWp4AgcFdJE/bsLRQWevlGYH3l0efYx+XH7TKR+UywNDOzWFDHNoUd7yhOrXof3y/fH810jvvh9CoUpQzzt2iPJYZgA4we211n3LnDR8YYrT45wUXnxtZWvXYYence3Tyng34YFV+g3QtrpnGPzGdk/399vfQzsKRzbo73uM0GsDzsUdxP6FgqFCXadE3q1l6s4jr6DimKfHzugvV3TxhiT0gF8D3gm4Pd1wGOxZIYOHWoSZcmSJQnHtVK2s8s7Wfd05VV3Z8o7Wfcw+UmTPEcIwAoTwaamcxvYDZwQ8HuQN0xRFEXJAukY8C+AU0XkRBHpCnwfeMcatRRFUZR4pPwS0xjTIiI/B94D8oHnjDFrLdNMURRFiUk6q1AwxrwLvGuRLoqiKEoSdLqdmIqiKB0FNeCKoigORQ24oiiKQ1EDriiK4lDEZNG3gYjUAl8nGL0YqEkxq3RkrZDvBxzMYf65rDste+pon8++rBXy2Sj7MGNMr7DQSLt7MnUQZTdRlLhPp5FPyrIWySdcTrvpr2V3Ztk7c5/vDGWPloedp1D+mSNZK+TTJZf6a9lzh/b57MtaIZ8uKeef7SmUFcaY0VnLMEd0lnJGQsuuZe9sZKPs0fLI9gj86Sznlys6SzkjoWXvnGjZc5BHVkfgiqIoinXYeQ5cURRFiYEacEVRFIeiBjwBROQEEVkiIutEZK2ITPeG9xGR90Vkk/fv0d7wb4jIZyJyRER+Ey8dO2Nh2QtFZLmIrPam84dclSlRrCp7QHr5IrJKROZluyzJYmXZRaRcRL4SkTIRWZGL8iSDxWXvLSJzRWSDiKwXkfGW6qpz4PERkYHAQGPMShHpBZQC3wZuAKqMMfeJyEzgaGPMDBE5BhjsjXPIGPOXWOkYY2z7HVELyy5AT2OMW0QKgKXAdGPM51kvVIJYVfaA9H4FjAaOMsZcnr2SJI+VZReRcmC0MSadzS5Zw+KyvwB8bIx5RjzfTehhjKm2SlcdgSeAMWavMWal9/9aYD2e739eCbzgjfYCngbEGFNhjPkCaE4wHdtiYdmNMcbt/VngPWw9erCq7AAiMgi4DHgm85qnj5VldxpWlV1EioHzgWe98ZqsNN6gBjxpRGQIcBawDDjWGLPXe2ofcGyK6TiCdMvunUIoAyqA940xnabswEPA7UBbJvTLJBaU3QALRaRURKZlRsvMkGbZTwQOAP/wTp09IyI9rdRPDXgSiEgR8DpwqzHmcOA545mLSmhEGSsdu2JF2Y0xrcaYUXi+nzpGRIZnQlerSbfsInI5UGGMKc2clpnBoj5/njHmbOCbwM0icr71mlqPBWXvApwNPGmMOQuoA2ZaqaMa8ATxztu+DrxsjHnDG7zfO1/mmzerSDEdW2NV2X14HyOXAFMtVtVyLCr7BOAK71zwa8AFIvJShlS2DKva3Riz2/u3AngTGJMZja3DorLvAnYFPGnOxWPQLUMNeAJ4X8A9C6w3xjwQcOod4Hrv/9cDb6eYjm2xsOz9RaS39//uwMXABssVthCrym6M+a0xZpAxZgiej38vNsZcmwGVLcPCdu/pfRGId/rgEmCN9Rpbh4Xtvg/YKSLDvEEXAtYuWDBpeNHqLAdwHp7HpS+BMu9xKdAXWARsAj4A+njjD8Bz9z0MVHv/PypaOrkuX5bKPgJY5U1nDXBnrsuWrbKHpFkCzMt12bLY7icBq73HWuB3uS5bNtsdGAWs8Kb1Fp6VK5bpqssIFUVRHIpOoSiKojgUNeCKoigORQ24oiiKQ1EDriiK4lDUgCuKojgUNeCKoigORQ24oiiKQ/n/ASf6MR5/snMLAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEICAYAAABGaK+TAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAA7mklEQVR4nO29eXhV1bn4/3kTAgGCQQYFxQtOUBUBFRkuIsEJqlZb2+/v1qtWb21pq7XYQaG3VmmvVm9tnadatXod4eJY8CIKHBUHkEBQJhnDDIGEhJwMZFq/P86QM4/7nLN38n6eZz/JWXu9a71r2O9ee+213i3GGBRFURTnkZdrBRRFUZTUUAOuKIriUNSAK4qiOBQ14IqiKA5FDbiiKIpDUQOuKIriUNSAK4qiOBQ14IqiKA5FDbgSFREpF5EGEXEHHMeJSDcRuVdEdnjPbxKR20REQuQvF5HlIlInIpUi8rKIDAo4f4OItAakvU1E/iEiQ0PSuVFENohIrYjsF5F3RaRXHN0ni8gSEakRkfIkyjzEK1fvzfOikPO/FJF9InJYRJ4TkW4B5/5LRL4SkRYRmZVEngNF5FkR2est4wYR+YOI9AyIIyKyVUTWJZHu/ycin3rL4kpUTnEOasCVeHzLGFMUcOwB/he4ELgU6AVcB0wDHvYJicj3gFeAh4B+wBnAEWCpiBwdkP5nxpgioBi4CGgASkVkuDedScCfgKuNMb2A04DZCehdBzwH3JZkeV8FVgF9gd8Bc0Wkv1eXKcBMb9kHAycBfwiQ3QzcDsxPNDMR6QN8BnQHxnvLeDHQGzg5IOr5wDHASSJyboLJV+Gp//sS1UdxGMYYPfSIeADlwEUhYRcCjcAJIeFjgVbgFECA7cDtIXHygDXAH72/bwCWRsh3HjDX+/9vgLfSKMNFQHmCcYfiucn0Cgj7GPip9/9XgD+F1MW+COm8BMxKMM+7ga+AvDjxngNeBt4AHkuyDn4EuHLdn/Sw/tARuJIsFwPLjDE7AwONMcuAXXiM2jDgX/CM1APjtAGve9OIxRvARO//y4Ap3imFCYFTFhngDGCrMaY2IGy1N9x3fnXIuWNFpG8aeV4EvOGtm4iISA/ge3gM+MvA90Wkaxp5Kh0ENeBKPN4SkWrv8Rae6ZC9UeLu9Z7vF/A7WpxY7AH6ABhjPgauAs7GMzVRKSIPiEh+UqVIjCKgJiSsBs80UaTzvv9jzsfHoS/R69PHVXieDBbiqYMC4LI08lQ6CGrAlXh82xjT23t8GzgIDIwSd6D3/MGA39HixOJ4PPO3ABhj/s8Y8y08Rv1KPFMvP0q0AEngBo4KCTsKqI1y3vd/LalTSfT69HE9MMcY02KMacTzFHN9GnkqHQQ14EqyfACMFZETAgNFZCxwArAY+BrPdMr/C4mTB3wXWBQnj+/gmXsOwhjTZoxZ5M1jeKoFiMFaPC8JA0fUI73hvvMjQ87tN8ZUppHnB8B3vHUThnfVzgXAtd7VL/vwTKdcKiLxnmSUDo4acCUpjDEf4DHAr4vIGSKSLyLj8Ly4e9IYs8kYY/C8fLxDRP5dRApFZADwDJ5R64Oh6XrTOVFEHgVK8K7uEJErReT7InK0dyndGGAS8HksPUUkT0QK8Uw3iFeHmPPGxpiNQBlwlzf+d4AReEa8AP8D3Cgip4tIb+AO4PmAPAu8eeYBXbxpxJvqeQBPnbwgIoO96RzvnSYagWeFz0Y87xVGeY+heG6QV8epg3yvPl2APK8+BXH0UZxErt+i6mHfgwirULzhhcB/AzvxLPvbjGd5XV5IvCuBL/As6avCs0TvhIDzN+BZueL2xtkOvACcFhDnfDw3jIN4pio2ErK6JYruJYAJOVwJyA0BXN5yfR1afuBXwH7gMPAPoFvAuecj5HlDAnkeh2eVyT5vGTcAdwE9vP/fEkHmdmBFnHRviKDP87nuV3pYd4i3oRVFURSHoVMoiqIoDkUNuOJYRGStBG/z9x3XxJCZGEXGnUE9n4qS51MWpB2xLCIyMb604nR0CkVRFMWhdMlmZr179zannHJKQnHr6uro2bNn/IgWy3Z2eSfrnq686u5MeSfrHib/9deev8OGBcUpLS09aIzpHyaczTemQ4cONYmyZMmShONaKdvZ5Z2se7ryqrsz5Z2se5j8pEmeIwSirDjSOXBFURSHogZcURTFoagBVxRFcShZfYkZiebmZnbt2kVjY2NQeHFxMevXr08pzXRkO7t8tvIuLCxk0KBBFBTozm5FSZWcG/Bdu3bRq1cvhgwZggR8kau2tpZevVLz0pmObGeXz0bexhgqKyvZtWsXJ554Ysp5KUpnJ+dTKI2NjfTt2zfIeCsdGxGhb9++YU9diqIkR84NOKDGuxOiba4o6WMLA65kl/qmFuqbWnKthqIoadLpDXh1dTVPPPFEyvIPPfQQ9fX1FmrkYcWKFfziF7+wPF2AzRVuNldkzPWHoihZQg24DQ14S0sLo0eP5pFHHrE0XUVROhad3oDPnDmTLVu2MGrUKG677TYAHn74Yc4991xGjBjBXXfdBXj8FVx22WWMHDmS4cOHM3v2bB555BH27NnD5MmTmTx5csS0Tz/9dEaMGMFvfvMbAA4cOMB3v/tdzj33XM4991w++eQTAGbNmsV1113HhAkT+PGPf4zL5eLyyy/35/3DH/6QMWPGcNZZZ/H2228DsHbtWsaMGcOoUaMYMWIEmzZtynh9KYpiH3K+jDCQP/xzLev2HAagtbWV/PzUPjweKHv6cUdx17fOiBr3vvvuY82aNZSVlQGwcOFCtmzZwvLlyzHGcMUVV/DRRx9x4MABjjvuOObPnw9ATU0NxcXFPPDAAyxZsoR+/YI/T1hZWcmbb77Jhg0bEBGqq6sBmD59Or/85S8577zz2LFjB1OmTPGvm163bh1Lly6lpaWF0tJSf1r33HMPF1xwAc899xzV1dWMGTOGiy66iKeeeorp06dzzTXX0NTURGtra0r1pSiKM7GVAbcDCxcuZPHixZx11lkAuN1uNm3axMSJE/n1r3/NjBkzuPzyy5k4Mba75eLiYgoLC7nxxhu5/PLL/aPpDz74gHXr1vnjHT58GLfbMx99xRVX0L17d2prgz9yvnDhQt555x3+8pe/AJ6llzt27GD8+PHcc8897Nq1i6uuuopTTz3VsnpQFMX+2MqAB46Uc7UZxRjDr371K6ZPnx52buXKlbz77rvccccdXHjhhdx5551R0+nSpQvLly9n0aJFzJ07l8cee4zFixfT1tbG559/TmFhYZhMNJeUxhhef/11hoW4mDzttNMYO3Ys8+fP59JLL+Vvf/sbF1xwQZIlVhTFqXT6OfBevXoFjXinTJnCiy++6B8V7969m4qKCvbs2UOPHj249tprue2221i5cmVEeR9ut5uamhouvfRSHnzwQVavXg3AJZdcwqOPPuqP55u6icWUKVN49NFHMd6Pb6xatQqArVu3ctJJJ/GLX/yCK6+8ki+//DK1SlAUxZHYagSeC/r27cuECRMYPnw43/zmN7n//vtZtWoV48ePB6CoqIiXXnqJzZs3c9ttt5GXl0dBQQFPPvkkANOmTWPq1Kkcd9xxLFmyxJ9ubW0tV155JY2NjRhjeOCBBwB45JFHuPnmmxkxYgQtLS2cf/75PPVU7C9r/f73v+fWW29lxIgRtLW1ceKJJzJv3jzmzJnDiy++SEFBAQMGDOA///M/M1RLiqLYkU5vwAFeeeWVoN833XQTM2bMCAo7+eSTmTJlSpjsLbfcwi233BIWPnDgQJYvXx4W3q9fP2bPnh0WPmvWrKDfJSUllJSUANC9e3f+9re/hcnMnDmTmTNnhoUritI56PRTKIqiKE4lrgEXkedEpEJE1kQ492sRMSLSL5KsoiiKkjkSGYE/D0wNDRSRE4BLgB0W66QoiqIkQFwDboz5CKiKcOpB4HbAWK2UoiiKEh/xLU2LGUlkCDDPGDPc+/tK4AJjzHQRKQdGG2MORpGdBkwD6N+//zlz5swJOl9cXMwpp5wSJmfVTkyVD2dbjWfH5onF4XGyqfvmzZupqakJCnO73RQVFaWcfzryucw7XXkn656uvJN1D5UfdeutAJQ99FBQnMmTJ5caY0aHCUf6VH3oAQwB1nj/7wEsA4q9v8uBfomkM3ToUBPKunXrwsKMMebw4cMRwxMhHdnOIL965yGzeuehnOQdSKS2X7JkSVr5pyOfy7zTlXey7unKO1n3MPlJkzxHCMAKE8GmprIK5WTgRGC1d/Q9CFgpIgNSSKtDkOzd95FHHuG0007jmmuuCQovKyvj3XffTTr/PXv28L3vfS9puVTJpKtbRVESJ+l14MaYr4BjfL/jTaEo4TzxxBN88MEHDBo0KCi8rKyMFStWRPSz0tLSQpcukZvruOOOY+7cuRnRNZIeo0ePZvTo8Kc5RVGySyLLCF8FPgOGicguEbkx82pll5deesnvlvUnP/mJ36tfUVERv/vd7xg5ciTjxo1j//79AGzbto3x48dz5plncscdd0RN94EHHmD48OEMHz6ch7xzWj/96U/ZunUr3/zmN3nwwQf9cZuamrjzzjuZPXs2EyZMYPbs2UEuZq+77jrKy8uZOHEiZ599NmeffTaffvopAOXl5QwfPhyA559/nmuuuYapU6dy6qmncvvtt0fU7aF7Z0V0dXvttdfGdHV73XXXxXR16/PWqK5uFSXzxB2BG2OujnN+iGXa3HoreH2DdG9thRRfpgXJjhoFIS8EAlm/fj2zZ8/mk08+oaCggJtuuonZs2fzk5/8hLq6OsaNG8c999zD7bffzt///nfuuOMOpk+fzs9+9jN+8IMf8Pjjj0dMt7S0lH/84x8sW7YMYwxjx45l0qRJPPXUUyxYsCDMBW3Xrl354x//yIoVK7j33nvp1asXs2bN8ruY7d69O/X19bz//vsUFhayadMmrr76alasWBGW91dffUVZWRndunVj2LBh3HLLLZxwwgn+89WHqli8YD7bNm8Mc3V78803c8kll0R1ddu9e3dcLpc/rVBXt6NHj+Zb3/qWurpVlCzQ6bfSL1q0iNLSUs4991wAGhoaKC4uBjxG1TfSPOecc3j//fcB+OSTT3j99dcBuO6668K23QMsXbqU73znO34Pg1dddRUff/yx301tovhczAI0Nzfz85//nLKyMvLz89m4cWNEmUmTJvnLcPrpp7N9+/YgA17U6yi6desW0dXtmjVryMvzPJhFcnUbSqir2yNHjqirW0XJEvYy4AEj5YY0XMImI2uM4frrr+fee+/1h/m8CxYUFPi/np6fn09LS/uHgLP1VfVAF7MPPvggxx57LKtXr6atrS2iS1rw3Hh8hOoNHle3L/9zERVfl4a5ul20aBH9+/ePqUcgJsTVrc+Vr7q6VZTM0+l9oVx44YXMnTuXiooKAKqqqtixI/bm0gkTJvDaa68B8PLLL0eMM3HiRN566y3q6+upq6vjzTffjPsRiGiuaX3U1NQwcOBA8vLyePHFF1Oelqivc1Nbeziiq9tAp1mpuLr1paWubhUl83R6A3766adz9913c8kllzBixAguvvhi9u3bF1Pm4Ycf5vHHH+fMM89k9+7dEeOcffbZ3HDDDYwZM4axY8fyox/9KO70yeTJk1m3bp3/JWYoN910Ey+88AIjR45kw4YNUUfF8ahzu7nlhu8zYsQIzjvvvCBXt6tWrWLEiBGcfvrpcd3cgsfVbXNzMyNGjOCMM87g7rvvBmDOnDkMHz6cUaNGsWbNGn7wgx+kpKuiKDGItDg8U4du5LGHvG7ksV421/JO1j1deSfrHiafhY08iqIoig1QA96JaG1Tv2OK0pGwhQE3CTjUUtLjUH0Ta/fU0Nhsj/XY2uaKkj45N+CFhYVUVlbqBZ1hDjc0A9jCgBtjqKysjLoMUlGUxMj5OvBBgwaxa9cuDhw4EBTe2NiY8gWejmxHla+qa6K+qZXmyq5U1TUBsL42fGNOtnQvLCwM8wWjKEpy5NyAFxQUcOKJJ4aFu1yupHctWiHbUeVvfmUl87/cy6NXn8Ut76wCoPy+y7KSt6IomSHnUyiKoihKaqgBVxRFcShqwBVFURyKGnBFURSHogZcURTFoagBVxRFcShqwBVFURxKIt/EfE5EKkRkTUDY/SKyQUS+FJE3RaR3RrVUFEVRwkhkBP48MDUk7H1guDFmBLAR+K3FeimKoihxiGvAjTEfAVUhYQuNMb7vdH0O6J5oRVGULCOJOJESkSHAPGPM8Ajn/gnMNsa8FEV2GjANoH///ufMmTMnIcXcbjdFRUUJxbVStqPKP1HWyPJ9rfxsZDeeXH0EgOenhn/Rx466Z0tedXemvJN1D5UfdeutAJQFfB8YYPLkyaXGmNFhwpG+8hB6AEOANRHCfwe8ifdGEO+I9EWehL5SkSS2+sKGTeRvernUDJ4xz7xTttsMnjHPDJ4xL2t5O0VedXemvJN1D5NP8os8KTuzEpEbgMuBC70ZKIqiKFkkJQMuIlOB24FJxph6a1VSFEVREiGRZYSvAp8Bw0Rkl4jcCDwG9ALeF5EyEYn/+XJFURTFUuKOwI0xV0cIfjYDuiiKoihJoDsxFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHIoacEVRFIeiBlxRFMWhqAFXFEVxKGrAFUVRHEoin1R7TkQqRGRNQFgfEXlfRDZ5/x6dWTUVRVGUUBIZgT8PTA0JmwksMsacCizy/lYURVGySCLfxPxIRIaEBF8JlHj/fwFwATOsVExRkuHDjQfIk1xroSjZRYwx8SN5DPg8Y8xw7+9qY0xv7/8CHPL9jiA7DZgG0L9//3PmzJmTkGJut5uioqKE4lop21HlnyhrZPm+Vn42shtPrj4CwPNTe2Yl72zI37CgDoDHzjOdst84Wfd05Z2se6j8qFtvBaDsoYeC4kyePLnUGDM6TNgYE/cAhgBrAn5Xh5w/lEg6Q4cONYmyZMmShONaKdtR5W96udQMnjHPvFO22wyeMc8MnjEva3lnQ95Xps7ab5yse7ryTtY9TH7SJM8RArDCRLCpqa5C2S8iAwG8fytSTEdRFEVJkVQN+DvA9d7/rwfetkYdRVEUJVESWUb4KvAZMExEdonIjcB9wMUisgm4yPtbURRFySKJrEK5OsqpCy3WRVEURUkC3YmpKIriUNSAK4qiOBQ14IqiKA5FDbiiKIpDUQOuKIriUNSAK4qiOBRHGfClmw5S29icazUUG7OuspWaBu0jSufAMQb8oPsI1z67jJ+/sirXqig25s9fNPLjF1bkWg1FyQqOMeCNza0AbK5w51gTxe5srKjNtQqKkhUcY8AT8HqrKID2FaXz4BgDriiKogTjGAMu+rUVRVGUIBxjwBVFUZRg1IAriqI4FDXgiqIoDkUNuKIoikNRA64oiuJQ0jLgIvJLEVkrImtE5FURKbRKsVB0ba+SKEY7i9JJSNmAi8jxwC+A0caY4UA+8H2rFFMURVFik+4UShegu4h0AXoAe9JXKTJ2XAe+o7JeR3uKouQMSccAich04B6gAVhojLkmQpxpwDSA/v37nzNnzpyE0na73RQVFfl/H2xo4zcfNtC3UPhrSY+kZJMlEfnymlZmfdbI1d/oypQhBVnPP1n5J8oaWb6vlZ+N7MaTq48A8PzUnlnJOxvyNyyo8//fows8cVF42TKVtx3knax7uvJO1j1UftSttwJQ9tBDQXEmT55caowZHSZsjEnpAI4GFgP9gQLgLeDaWDJDhw41ibJkyZKg3zur6szgGfPMv967KGnZZElEft7qPWbwjHnmpy+uyEn+ycrf9HKpGTxjnnmnbLcZPGOeGTxjXtbyzoa8r0yDZ8wzZ961IKt520HeybqnK+9k3cPkJ03yHCEAK0wEm5rOFMpFwDZjzAFjTDPwBvCvaaTnKOw4paMoSuciHQO+AxgnIj1ERIALgfXWqOUcdApcUZRckbIBN8YsA+YCK4GvvGk9bZFetsc3ADeoBVcUJTd0SUfYGHMXcJdFusTJKxu5JI5OodgXm3UVRckYuhMzTex2Y1EUpfPgGANuvxGv7RRSFKWT4RgDbld0AK4oSq5QA54ivicCnUJRFCVXqAFPEZ1AURQl16gBVxRFcShqwBVFURxKTg14pfsIQ2bO59XlO+LGnXS/K/MKpYR1k+BDZs5n1jtrLUuv06LvJWzLjc9/wZCZ83OtRochpwZ8R1U9AK99sTNu3NY2e12VkqF1jc9/Wp6RdBXFDizaUJFrFToUOoWSJroKxYboG2alk6AGPEXafaEoiqLkBjXgKWK/naGKonQ21ICnidE5FPuhTaJ0EtSAp4iOwBVFyTVqwNNEB3uKouQKNeApIt7XmDqDoihKrlADnio6haIoSo5RA54mOgBXFCVXpGXARaS3iMwVkQ0isl5ExlulmN2xegCuq1msQ2tS6Syk9U1M4GFggTHmeyLSFehhgU6OwirDq/ZbUZRkSXkELiLFwPnAswDGmCZjTLVFesVl0fr9TLhvMU0tbdnKkoamVsb9aRFLNx0M8oXS1NLGxD8v5oN1+7OmS6o8/dHWqOfmfLGT33/SkEVtOiffenRpVAduK8qrGH33BxxubM6yVqkz7X9W8MamplyrkRF81/wnmw9mL8/mVkb9cSF7quNfi5LqCFJERgFPA+uAkUApMN0YUxcSbxowDaB///7nzJkzx39uS3Ur//V5IycV53Hn+O5B6bvdboqKivy/b1jgSbZvofDXkh782lVPZaPh/vO7079HXkzZZIkmX17TyqzPGhl8VB7fO7WAv5Ye4Yy+efzH8G785sMGv26p5N9mDD98z+Pc67HzjOX6P1HWyPJ9rUFhz0/tGfTbV8eh4enmnQ15n+4Ahfnw1MXJlyFbukerZ7fbzePr8llf1cZtows5o1++5XlnQj6ZfhOr7Knmn8myB17zf/jX7hHjWJn/qFtvZX99G1Ouuperv9GVKUMKAJg8eXKpMWZ0qGw6UyhdgLOBW4wxy0TkYWAm8PvASMaYp/EYeoYNG2ZKSkr854p3HILPP6XXUUdRUjIhKHGXy0VgXBZ4XFAWFhZSUlJC4eeLobGBcePGcUKfHrFlkySa/Fe7auCzpfTqVcTIkd+A0uUcfXQfxo07Ez5c4tctlfxbWtvgvf8DoKioyHL9/3fPSti3NygsLA9vHWei7jIuv6DdRWmXLl1SSiNrukepZ5fLRZ8+hVBVyciRIznv1H7W550J+WT6TYyyp5p/JsseeM2XlEzMfP69e1Pd6hnInXzyyZRMPCmmbDovMXcBu4wxy7y/5+Ix6FkhlzshjdGdmIrSGTA2fyWesgE3xuwDdorIMG/QhXimU7KCXT4qbFUD27ubKNnG7oZDsQfprkK5BXjZuwJlK/Af6auUHLnq6GLxQsJc34gUe2B1v1I6NmkZcGNMGRA2sZ4NcrGVPdK0iRpe+6Fr6hWrseuN1bE7Mf1TKDnO3yr0kVkJRO9BSiI414B7/3aU0VYHKYaSJrkemCjOwrkG3CbLQNTwKoqSKxxrwH3kbAolR/lmg47yVONktA2URHCsAW+fQsmpGtYtI7TR9WonXTobdnmyVJyBYw14+xA4+9bGGDr0EFztt6J4yOVgJpG8bWHA91Q3MOudtbS2JV5bdrGfn2+t8lf07uoGDtWl5tQnl6tQnnBtpnR7VdTzLy/bzqL17Y663lu7j1eW7eDOt9ewr6YxGyp2OqL1hseXxG6rdDnc2Mxv3/iK+qaWpGXfWLmLeV/uyYBWSjTS3chjCQdqj/D8p+VceuZAxpzYJyEZ36Nmru6QgetC6wI6+5/f+5opiRUhiFze6f+84Oug3ybkEeN3b64BoPy+ywD4yYul/nPbK+t54YdjMq9kEjj5CSLewOT+9zxt5WsLq3nStYVXl+9gcN8e/HTSyUnJ/mrOagAuH3FcJlTrdCQym2aLEXgq+OfAc5V/QOUGG18nmw8PyZSgTSfMOxS+9tRmDSYXryYcM4WSDnboaME6pNbSNiiGHzvUaacnV22gbR+E3avDVgY8mbtc+4aHXPlCaccKHey0bCyZ8uiqCWvJdb9WnIWtDHgy2Mk3QaDt7Qj2zEb3kg5HvBt1zrtPzhWwF7mojmTydKwB95Gzl5hRLHWqDa42UwkkZzdR7YhB2L06HGvAc+0PPHAkFfgiL9URuI56Owfx2lmnpJRkcKwB95GLucLQHAOXr3cEQ9wRyqCkiN4/HIVjDXiuRyqBNi5wA1LKts9GRtPpL9DsfANKVDWdQrEXdr2v2cqAJ1NJdvGFAsFTKKnqYyejmUwZ7NqxnYrWp5IMaRtwEckXkVUiMs8KhRLPN5u5hecZaOSCXQDYxxCnivNLYF/stFw0InoHsQ2JDOqsGIFPB9ZbkE5K2OF6aGuzYARug3Io9iFn3UH7YRB2v+GmZcBFZBBwGfBMIvFrmw019c2Ap2JeXb4jYrwtB9ysqojtTMc3Gm4zhhc/K6exuTXo/J7qBv652jrHOuv2HObjTQcBWL/3MJsrav3nWqM0cm1jM68s28GnWw7y1a4af3hDUysvfr49qHMEprB0dzOV7iPsrWngnShl2HLAzQfr9vt1+2jjgaDzjc2t/M9n5bS1GWobm5n/5d6Ey5psp91c4ebvH23NamevaWjmtSj9Jx0qDjfydtluS9M8VNfE/67YCUBjS1vMuO2rq+LX5Tur97C3piEpXRZv2B/Ud6Ph+rqCTfvjx1vydUXcOP+7YieV7iMJ6Qfw6eaDrNldExZ+0H2Eq5/+PGmHce9+tZedVfX+30s3HWTtnvb0vzrQwlurdrPUe33HYv3ew/x14deU7axOSodofLTxAM98dYTWNsOHGw9Q39QaXyiAdJ1ZPQTcDvSKFkFEpgHTALoOOIUbnlrE9LML2VDVypwVwZ7sVq1ahbs8nxsW1AFw1jGusPQaGxtxuVy4az0d96l3l/N/25r5ZPVGvv+NrgC43W4uf2gJVY2GnlVfk5fkfIvb7cblCs7bp5OP37+9tl3vstX+//fs3Yu7WzMul4unVjfy+d72Bnl+ak8AXl5/hPe3t3BwxybOOsbTBLVN7RfsM1818fGuxRw6YqioN3Sv/JqCvOAy+PR5fmrPoP99+v/6uUXM39bM7m2bWX0g8s0wtIw+Pv54KT0KwussUvyqqip++tzHbK5uo09dOd1a66OmmwiR6j4Sj65qpHR/K417NzGkOD/oXGtba0o6uN1urnpkMbvchoIDGyPWQTz5SPne/0UDayvbaN2/icU7mv3hoXHdbjeVlZ5rYs2aNXQ9sCFqXh8sXsIvFtbTv7tw/6QeCdfbD0P6SqjuO3d6jOOybVVc/OBHYfFC+Y+Q6wKCy3Wgvo3bPmpg2NF5/HZs94hxAvMHwvqzj599UEdDC1zx0CL+a0L3iLKRuGlBHT0L4PELe0ZM/6+lR6C0LGKeW6s91+/h2lpcLpdf9tHFm4Out1T7vC+9P73yAc+uaeK1XdX0KfT0uy1btuJq2xlTPmUDLiKXAxXGmFIRKYkWzxjzNPA0QLeBp5q2br0oKZmA2VABy78IinvWWWcxekgfWDAfgJKSgGS9YYWFhZSUlHDUmqVwuIbe/QfCth0c1e9YSkpGAp7OUdXoqZiSSSXk5SV3IbpcruC8A/KPxPAzz4TSFQAMGDCAoqJDlJSU8MzmZUD7Xd2X5pv7VsH2PZw09DRKRh0P4BmhLP7AH/dIXiHVRxoBw/nnn0+3LsFGKqiOQurL5XLRq18f2LaTwScPZW393iA9QvUJLduE886juHtB5LxC4vfp08c7GmljzNhxbPlyeXjdJUHEuo/Ao+s/BQ5xxsizODegzwDk5+WnpIPL5aK2tQloZvy/TuDonl2Tlo+U772rPgJqGXXOaJa5t8J2zwg/NK7L5aJv3yKo2M8Zw4dTcsaA8Ey85Txv4vmwcAE1TUJJSUnC9Rbx2grQ/bP69bBtqz88bpoRrotAma0H3PDRhxzJK4zYV0Pzj6Vjgzf8cEtw+8Yt+4L51DWH999I/Tk0neIdh+DzTzmqVy9KSs6LGDfhuo+iG8CgE0+BNesA6NatEICTTz6JkvNje4RMZwplAnCFiJQDrwEXiMhLaaSXFIl+1DgbD/WtAU/Fqa9CyQwpvexNURn1TBifuFvpO9hLRN/TbxKu/pUkSNmAG2N+a4wZZIwZAnwfWGyMuTbhBCJ01FQ6r+96iOYbJRvzstHWgefyYkzUP0uk+knOmVVudsVmqmod91V4qyvC4vQy4Zyro93k0iH768Ctusp9H3SI0zGycSFasg48RM7EOJdQet4U4vX1SGnb7SkimyT6ZJcqVm3k8fU5u9sy38DKyurMSZltetew5Is8xhgX4LIirUTxVacdHs2CDHiKZszqjTz+J5Ms9rtcLLmyOkv/l56sTTbx/BOM15aD9k2FTDyd5XoXdiZItX6yPgK3qh0DlxEG/g7LLwtXYjLf8kyUdPX2icdzuxspm2SzzsXXkWJOC6WhSaZ3+FqVrt3XJ/vwLSBwir52wpZf5LG8HeOkl40t6kEv71LNLmwKJb1pGdNuwePEizAHnmIjdYRrNNMfVIiXavuNKXa7+EfgNp9EycSTcjIl7ug3Dlv5QkmG9o4RZw48KyPwgPxSTCOWnBUjymTyTT23jnCx5NYgJjpnbOI8eaaM5VNSvmRz0zesepLNNJHyseVHjWM3ZOK9MddzlYEEb6W3RqOgQX0aLzGTySeV/II+LWeHxrCKjE2hpHF7TLNP5JKc+e3PTbaWYMspFKtI9NEsKyPwKFvikyGWniml6X/JFW8OPL1lhBGyzCpWPyLbZRlhvCejjK1CsThBX/NYWZ/JpKVTKDYl0ZeY2SDaNE404xlx1Gv1KhSfDjH0sArxb9bI3sXin2qwPF0PVpYl6H1GvPwTbKqMrb6yOF1fclYa0lwY5Uybl1TLlHUDXn6wPi1nQburG5izYidflB8C2gu+fFsVry3fgTGGr6va/Y9k5SVmFG+EoY1+pKWVlTsO+X9/UV5F+cE6Nuw7HJbm4YZmmryT674y7q1poPxgsO+JQCdZgSQ6Rxqp37S2GZZtreRISyul2w+FRwhg1c5qmlt8enrCdlbVs+tQfVC81Tur2Xawjn01jWw7GO4/Ix7NrW2sKK9qD4hRrsbmNs8W7gAqDjeyJSTMR1VdE1/v8zhuyvSmpHV7wtvak5+n326u8OhYcbiRg+4jfodS+w83su1gu/7R+nVrm2H5tvZ6+nJXNXVHPL5wAh2+VRxuDHIYVd9sghw8+Vi7p4bDjc1xwwKpqW9mze6aID2srM9ISTW3tlG6vYoV5VW0eK+bmvpm1u+N7pDry13VSeXb0hrZEVmbCa5zgIra6P1t4/7aMOde1fUB9entg+WVdXGvP0vWgSeD+0gL018r447LTktYJtCTGMDtc7/0/+/rGFsO1DHzja9YueNQkJOsbC8jHNK3BxDZqM56Zy2vLt/JyBN6A/DS5zt46XOPR71PZ14QFPdwY7sDKl/q4+9dDED5fZf5z33rsaUR8/KPwCX50cOE+xbTZmDAUYXsOxzscMxnDHwEdjxfXU/885IwPa98/BP69uxKpdeTXOC5RPjrwo089eEW3vn5BEYM6h03/gV//TAojzF/WhQ138se+Zi9NY08P7Wnf2Rv5Qg8cKVItJvX7C92cu/y9rqe9c91/HXhRmqPtFB+32WM9ervo32df3DrPvXhFu5/72te+dFYzhxUzBWPfcIF3ziG5244l/988yt/PF99bLz7m3Ttksf9KxrZtmgpP5l0UlB6lz2ylFEn9OatmycEhY0cVMzbPz8vYln+7enP2OC9If7l/40ELH6iiZDUnxds4O8fbwPg5sknc9uUbwTpEYkrHvuEL2ddknC+Dy/aFDH8/e0tvPreZ/zjP85l8rBjABj3p0W0mcj97ZIHP6Jvz66U/v5if9hjSzaHxXt1+U5eXb6T9395flSdcjaFUtMQ/Q4eivtIdNeyoR3DNzL3YUW3ifd40xpwemDv7lHjrdntGX3VRih7zFUoaRQi7jrwCGn77kehxhugKYY71HhPO5VJugENxPeUUulOPY1o7K1pL2cmR+Cx0txWGW7YaxPo96Gt6xvB7zvc6G8rn+vTSE9rvnS21XjbNYKOkVynro7y5AcEGc19Xne3ls6BR6jIwDw37neHhUWjpTW2ZoFZfR0lvb1uT93tqW537RtviiuZa+FQfXRbaas58GiP+7HcwYa2ZWhMK+bL4m5rTvCDDilP56SyCiVBGSunmDL5tJPtac9sG/BcppVcvsll7H+JmeEplECSeeJM0lGp7ciZAbeqkkMbM9TWWzICj3M+eBVKAjlGKE+sCyMVIxs4hRIzntNe0mf4gsvES0wfVt4s/fpZvWokzvlkdx37YltanxGSCpxKSuadfbwnVLvv2rfVCDwasVZRhBq+ZD/ekAjxOl/SnTPSKhSr3/77X2Jmrwfm4maQKV8omViFkokReLqtG6pTWxwDnezqF3/6FpY9UttI0P/W9Xm7D3AcYcBjjcBDO1TYCNyCBkh1CsUqXVLbyOPVIcF4VpDJFT9hT1oZyylyfnbDMp8qISWNZ6CTvbH5b15JScVLM5zAay2ZMUuudohaRe4MeBK1HGtUHdqhwuJaYcDjJJLoVvqsummNcSMJipbh1QHWpR35pZ3VF2Ay36RMFiuTbLPoCStUp7jumVMsQ+h1atWOVB9BI/BkDLiz7bdT5sBjGfCQdEPiWnGBx2vkoI8aB/yfnNOdWOdSmQP3XeCx9bCy/2ZjI4+vfTM1M9S+QczCNDOwNNGqtEJTSaqvJ5K+N3qmNkb5yAuaA0+8c8TXqv06siO2mkJJpY5CjVv4KpSU1UmYtihf5Akl1rlYHTydIqSyjDBVHD6YCSIjq1BsmFbo9ZPo+55EDZp/Cjx0pJ9GASKOwAOnUJJKy9m91lYGPBqxRtGh9Z8XUiJLVqEkMSpJdSQda2ST0hx4ojLpv39NPs8UyNCiizAyMVr2kYnt5OmODJMdgRvvdGHC2ZrszIEHamTlCNzu9j1lAy4iJ4jIEhFZJyJrRWR6MvLJ1Esya6szsQol3jRMshd7pPjx3v4ngolwI7F6nWtsI5SNKZTMZhnqY8cKMvGirC2BG1qYcY4UJyQw8RF4Yh3LhP0TXZeEsXQEnnRWtiKdrfQtwK+NMStFpBdQKiLvG2PWJSQdoeaiVVasTtUWfWOgN5vMz4FHcycb1ZlVhLCYI/AEu5Ex4f6X464DT7KL5mwEHpJzpj9kYPsplAQSC5+2iCAUZsBjp+nrp4nWvn8jTybWwAcQOFBJZtCS7Ae87UbKBtwYsxfY6/2/VkTWA8cDCRnwlTuqI6Tpcdjko/xgHY0trTFH1Yfqg7ekHg7Zpl5V10SbgcKCPHoVFiSiWrhecc4Hbu+ubWyhrs1Q29hMc4jzm/3eren1Ta2EEmtL776aRtwBvlFCHeH4aDOGPISmVuPPY9N+N7sONUSMv7u6gYoI2+WjUVHbGFa/gdQ1tXKosb3Muw7V079XtyDdfZQfrKNfr240ePXs2iWP4u4FNLUaGppa6d41H4D6phZa24x/u3tTSxt7axqoCtiKXN8Uecv53poGBhZHd21QXd/E7oDtz8YYvz7VMbYvJ8s+r+6HQrZPu4+0UJDv6duhPmYCieQ/ZY/3OmluNew/3EhLiOU9VN/sd15VVdfEQfeRsDjgMWA1Qf5sIvfD6vomjGk3ni1tJqw8kaj1Or0KzXpvTQN9e3aje9d82toMlQ1tVNc3BcXbWVVPr8IuiEjQtdTSZthT3cBxAW4rJIEplEN1TRzds2uIfi0hv5spLMinID+PltY2f39vbG4Lu2731TRS3L2AI95r1+ceJLAtt1fWcVRhAT265dMlLy/ILUiowzUfyTyNixUjVBEZAnwEDDfGRHa3BnQbeKoZeP1DUdN5+rpzmPZiaVh4j675EY1esiTqQMnlclFSUuL/XdvYzJmzFiacz9HdhENHkqvXH0880e+MJ1V8jonOnvUuVY12f/gLZkjfHrhum8wpv51Pi2lvqyEz58eUe+VHY/n3Z5ZFPT972jjGntTXn05gHwhN+7kpPfjhe+2O01bfeQnFPRK/6Yf2G4DDjc2MiNF3Rp7Qm7ojLX4fJqkyol8+7/xmKr+cXcabqxL39vnhbSVMut+VcPy7vz2cO95ak7R+eQJb770srM7L77uMJ11b+O8FG5JO8+Hvj+LKUcfjcrl4bWcvFqzdB8B3zjqeB/9tVMS+8z8/HMMPnlseM92Jp/bjxRvH8ts3vuTV5TuT0mn1XZcw8g/h7T38+KM451+O5oXPtseUf+2VmQB8/9/v84fN+cl4xp7Ut9QYMzo0ftreCEWkCHgduDWS8RaRacA0gK4DTomZ1rJVX0UMt8J4g+cCSwS32x0Ut645OWOYrPEGKN+RXEeJxIcffUhBnjjOeAOUV9bjcrlo8aqeaFuVrV4d8/ybH66kYUe7EY6Vbm1tHYGTA++5PuaYHom/JgrtNwBVjbHn+FZHcBSVCl8ebMXlcrFvf+JPVACLl36eVPx/fr4+qfg+2kzkune5XMz/Ijmdfcz9eA3F1Ztwu90cPNiexv79+6O28xsfrYqb7sebDuJyuZi7Inm3xwuXfBwxfM3uw2yriDq2jcmqVdF1TsuAi0gBHuP9sjHmjUhxjDFPA0+DZwQeK71Thg6DNZGNuBWEjo6iETqSqqlvhkWJj8BT4diBx8H2HWmlMXHi+RQW5MOC2KNWu1JSUuLX3V//ccoycuRI+CL6CPzUU0+h5LwTw9I1xsCCd4Pi9izqCbSPwMeOHcvgvj0T1j/SCHx3dQO4FiecRjqUlJTw9v4y2JP4CPycc86Bzz5JOH6//sfAvr0paBfcvoFhz25ZBpUHk05vwIABlJSMxOVycewxR8F+j14DveGR+s6QIUNgc2S3sKF65X3wf/FfsoUwbtw4+GhJxHMFBQXQnPzU3IiRo6KeS2cVigDPAuuNMQ+kmk4gkebo7EA2tts2t6SfRza/iGMX4hU52gutSF0tE93PitVFmSTZl8F26mNBVZviVnonEKvO01kHPgG4DrhARMq8x6VppGfbzp6NPtuc5J0+Eja6tmxDNPfCkd79dMbqS9bY2cmAB634CgjvYPY7Zp2nswplKRbXVeiqDbuQjS7bHMexfCLY6eKyC3khQ3DfUstIY4VMVF9HaxM7jbEC6zZVd7JOIFad22onZrK+hrNFNrbbRvveXjLYtPpySuiSMt9FH8mwZqL67N4myV5zdnpKDtQkeB14x7LgmZpCsZxkHeVki+yMwC14+rBn9eWU0Es5lv3JyOYdm/ZpH1bsIs4VgW2ZqjdCJxCrD9nKgMf7Pl2uyEafbdIplIwQOhrTEXgwyRvwDCmSAtGmUDraLHissZ29DLidekcA2ViFYs0Uij3rL5PEa5vQVSjtBjxCWp1wBJ7soMlWfcxGqmQSx0yhZHp+LdWLKSurUCww4J2kPwcRr21CH6d9XSxbq1Ds3iYpfyLNBgSPwNvDnf6h4lAcM4VixVK6WKTa+XQKxb7EK3H0l5gR0jKxf6eC3dskWf3stNAgyIBjj1UomWhvx6xCsWIzSyxSfUnqmCmUNvs/sscimlfHWLQmcNMPTMuXR6T028K+D2nBTdWeK2P9JL0KxUb9K/BpKtidbPoWvK0ttas+Eze4WGla4swqUeI5s8oGiTi0Ct0Svae6gX+9LzvboRUPR/co4JCFHgHT4dUfj2P8yX3jxou0lX7tnhoue2RphjTLPmOG9GF5eZVl6Q07thdf76+1LD2nE8mZFcD2/748ojMrW43A7Yp9xhydB7sYb4DXV+5KWdZGA1ZLsHoErsY7PdSAJ4CTpyWU3GKnKQcrsOtejc5KpzPgqax00T7bubH6A7xOxkbvMBU6oQG361pzpWPS0Ubg+jRqLzqdAU/lgtI+27lJZ1laRxsvdLQbktPpdAY8lWU+2mk7N+k1f8fqOzZ1GNpp6XQGPJUplI51CSrZpKONwHUKxV50OgOe2ktM7bSdmbSmUDqYBbfTTkylExrwVJZBaZft3KRz/+5o9k6nE+1F5zPguoxQySLZcMOQTfRasBdpGXARmSoiX4vIZhGZaZVSmSS1R0DttUpqdDSDpxt57EU6X6XPBx4HvgmcDlwtIqdbpVim0BG4kk062pRDRyuP00n5o8bAGGCzMWYrgIi8BlwJrLNCsUxx3bPLKMiPfd+qq6+n58oP/b+PtOjaqc7M6yt38eWu6rjxQvsNQH1Ta4a0yg07qxpyrYISQDpTKMcDOwN+7/KGBSEi00RkhYiswEDXPOiWDwN7Ct8+pYBvn1LAoCLPa/5/6ZVH94BbynFF7a//v9GnXdXT+uTRsyBcoUtPDA7sWQDfPbU9rHc3oV+XIxRLQ8zj2G5tQb+PKTjC+IH59CkUf7oABXlw1jH5AFx7Wld+OLxrmE59CoVu+RHrj2N7BC9vuOCE9sIP7xtZaGBPj8wZfT31MeG4Lpw/KPp9uHsXOO94z/leXeGoru11+b2hBdx6drdgnbq3j7D6FAqn9M5j9LH59C2MvRSjq1fdQT0NYwbkM2ZAu/7d8sPL2qsr/voEGFQk5Ht/DugpnHNsu/xJxXmc1idyV50ypAsj+kWp4AgcFdJE/bsLRQWevlGYH3l0efYx+XH7TKR+UywNDOzWFDHNoUd7yhOrXof3y/fH810jvvh9CoUpQzzt2iPJYZgA4we211n3LnDR8YYrT45wUXnxtZWvXYYence3Tyng34YFV+g3QtrpnGPzGdk/399vfQzsKRzbo73uM0GsDzsUdxP6FgqFCXadE3q1l6s4jr6DimKfHzugvV3TxhiT0gF8D3gm4Pd1wGOxZIYOHWoSZcmSJQnHtVK2s8s7Wfd05VV3Z8o7Wfcw+UmTPEcIwAoTwaamcxvYDZwQ8HuQN0xRFEXJAukY8C+AU0XkRBHpCnwfeMcatRRFUZR4pPwS0xjTIiI/B94D8oHnjDFrLdNMURRFiUk6q1AwxrwLvGuRLoqiKEoSdLqdmIqiKB0FNeCKoigORQ24oiiKQ1EDriiK4lDEZNG3gYjUAl8nGL0YqEkxq3RkrZDvBxzMYf65rDste+pon8++rBXy2Sj7MGNMr7DQSLt7MnUQZTdRlLhPp5FPyrIWySdcTrvpr2V3Ztk7c5/vDGWPloedp1D+mSNZK+TTJZf6a9lzh/b57MtaIZ8uKeef7SmUFcaY0VnLMEd0lnJGQsuuZe9sZKPs0fLI9gj86Sznlys6SzkjoWXvnGjZc5BHVkfgiqIoinXYeQ5cURRFiYEacEVRFIeiBjwBROQEEVkiIutEZK2ITPeG9xGR90Vkk/fv0d7wb4jIZyJyRER+Ey8dO2Nh2QtFZLmIrPam84dclSlRrCp7QHr5IrJKROZluyzJYmXZRaRcRL4SkTIRWZGL8iSDxWXvLSJzRWSDiKwXkfGW6qpz4PERkYHAQGPMShHpBZQC3wZuAKqMMfeJyEzgaGPMDBE5BhjsjXPIGPOXWOkYY2z7HVELyy5AT2OMW0QKgKXAdGPM51kvVIJYVfaA9H4FjAaOMsZcnr2SJI+VZReRcmC0MSadzS5Zw+KyvwB8bIx5RjzfTehhjKm2SlcdgSeAMWavMWal9/9aYD2e739eCbzgjfYCngbEGFNhjPkCaE4wHdtiYdmNMcbt/VngPWw9erCq7AAiMgi4DHgm85qnj5VldxpWlV1EioHzgWe98ZqsNN6gBjxpRGQIcBawDDjWGLPXe2ofcGyK6TiCdMvunUIoAyqA940xnabswEPA7UBbJvTLJBaU3QALRaRURKZlRsvMkGbZTwQOAP/wTp09IyI9rdRPDXgSiEgR8DpwqzHmcOA545mLSmhEGSsdu2JF2Y0xrcaYUXi+nzpGRIZnQlerSbfsInI5UGGMKc2clpnBoj5/njHmbOCbwM0icr71mlqPBWXvApwNPGmMOQuoA2ZaqaMa8ATxztu+DrxsjHnDG7zfO1/mmzerSDEdW2NV2X14HyOXAFMtVtVyLCr7BOAK71zwa8AFIvJShlS2DKva3Riz2/u3AngTGJMZja3DorLvAnYFPGnOxWPQLUMNeAJ4X8A9C6w3xjwQcOod4Hrv/9cDb6eYjm2xsOz9RaS39//uwMXABssVthCrym6M+a0xZpAxZgiej38vNsZcmwGVLcPCdu/pfRGId/rgEmCN9Rpbh4Xtvg/YKSLDvEEXAtYuWDBpeNHqLAdwHp7HpS+BMu9xKdAXWARsAj4A+njjD8Bz9z0MVHv/PypaOrkuX5bKPgJY5U1nDXBnrsuWrbKHpFkCzMt12bLY7icBq73HWuB3uS5bNtsdGAWs8Kb1Fp6VK5bpqssIFUVRHIpOoSiKojgUNeCKoigORQ24oiiKQ1EDriiK4lDUgCuKojgUNeCKoigORQ24oiiKQ/n/ASf6MR5/snMLAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -106,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -125,18 +148,7 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "from pts.model.deepar import DeepAREstimator\n", - "from pts.modules import ZeroInflatedNegativeBinomialOutput\n", - "from pts import Trainer" - ] - }, - { - "cell_type": "code", - "execution_count": 10, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -156,75 +168,25 @@ " freq=dataset.metadata.freq,\n", " scaling=True,\n", " trainer=Trainer(device=device,\n", - " epochs=50,\n", + " epochs=1,\n", " learning_rate=1e-3,\n", " num_batches_per_epoch=120,\n", " batch_size=256,\n", " num_workers=8,\n", - " pin_memory=True,\n", " )\n", ")" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "119it [00:29, 4.09it/s, avg_epoch_loss=1.17, epoch=0]\n", - "119it [00:31, 3.81it/s, avg_epoch_loss=1.14, epoch=1]\n", - "119it [00:27, 4.29it/s, avg_epoch_loss=1.12, epoch=2]\n", - "119it [00:29, 4.06it/s, avg_epoch_loss=1.11, epoch=3]\n", - "119it [00:28, 4.17it/s, avg_epoch_loss=1.1, epoch=4]\n", - "119it [00:28, 4.14it/s, avg_epoch_loss=1.1, epoch=5]\n", - "119it [00:30, 3.93it/s, avg_epoch_loss=1.1, epoch=6] \n", - "119it [00:27, 4.27it/s, avg_epoch_loss=1.11, epoch=7]\n", - "119it [00:28, 4.16it/s, avg_epoch_loss=1.09, epoch=8]\n", - "119it [00:29, 4.09it/s, avg_epoch_loss=1.11, epoch=9]\n", - "119it [00:27, 4.40it/s, avg_epoch_loss=1.1, epoch=10]\n", - "119it [00:28, 4.23it/s, avg_epoch_loss=1.1, epoch=11]\n", - "119it [00:28, 4.12it/s, avg_epoch_loss=1.1, epoch=12] \n", - "119it [00:29, 4.06it/s, avg_epoch_loss=1.1, epoch=13]\n", - "119it [00:29, 4.10it/s, avg_epoch_loss=1.11, epoch=14]\n", - "119it [00:28, 4.24it/s, avg_epoch_loss=1.1, epoch=15] \n", - "119it [00:30, 3.95it/s, avg_epoch_loss=1.1, epoch=16]\n", - "119it [00:27, 4.28it/s, avg_epoch_loss=1.09, epoch=17]\n", - "119it [00:27, 4.26it/s, avg_epoch_loss=1.1, epoch=18]\n", - "119it [00:29, 4.07it/s, avg_epoch_loss=1.1, epoch=19] \n", - "119it [00:29, 3.98it/s, avg_epoch_loss=1.09, epoch=20]\n", - "119it [00:27, 4.33it/s, avg_epoch_loss=1.1, epoch=21]\n", - "119it [00:29, 4.08it/s, avg_epoch_loss=1.09, epoch=22]\n", - "119it [00:29, 4.09it/s, avg_epoch_loss=1.09, epoch=23]\n", - "119it [00:28, 4.22it/s, avg_epoch_loss=1.09, epoch=24]\n", - "119it [00:27, 4.26it/s, avg_epoch_loss=1.09, epoch=25]\n", - "119it [00:31, 3.81it/s, avg_epoch_loss=1.1, epoch=26]\n", - "119it [00:31, 3.73it/s, avg_epoch_loss=1.09, epoch=27]\n", - "119it [00:27, 4.32it/s, avg_epoch_loss=1.08, epoch=28]\n", - "119it [00:28, 4.14it/s, avg_epoch_loss=1.09, epoch=29]\n", - "119it [00:30, 3.87it/s, avg_epoch_loss=1.08, epoch=30]\n", - "119it [00:28, 4.19it/s, avg_epoch_loss=1.09, epoch=31]\n", - "119it [00:28, 4.17it/s, avg_epoch_loss=1.08, epoch=32]\n", - "119it [00:29, 4.09it/s, avg_epoch_loss=1.1, epoch=33] \n", - "119it [00:27, 4.39it/s, avg_epoch_loss=1.09, epoch=34]\n", - "119it [00:28, 4.21it/s, avg_epoch_loss=1.09, epoch=35]\n", - "119it [00:28, 4.16it/s, avg_epoch_loss=1.09, epoch=36]\n", - "119it [00:27, 4.31it/s, avg_epoch_loss=1.08, epoch=37]\n", - "119it [00:29, 4.07it/s, avg_epoch_loss=1.09, epoch=38]\n", - "119it [00:28, 4.19it/s, avg_epoch_loss=1.09, epoch=39]\n", - "119it [00:29, 4.06it/s, avg_epoch_loss=1.09, epoch=40]\n", - "119it [00:28, 4.14it/s, avg_epoch_loss=1.08, epoch=41]\n", - "119it [00:28, 4.16it/s, avg_epoch_loss=1.09, epoch=42]\n", - "119it [00:27, 4.25it/s, avg_epoch_loss=1.09, epoch=43]\n", - "119it [00:27, 4.26it/s, avg_epoch_loss=1.1, epoch=44]\n", - "119it [00:26, 4.41it/s, avg_epoch_loss=1.09, epoch=45]\n", - "119it [00:27, 4.25it/s, avg_epoch_loss=1.08, epoch=46]\n", - "119it [00:28, 4.20it/s, avg_epoch_loss=1.09, epoch=47]\n", - "119it [00:30, 3.92it/s, avg_epoch_loss=1.09, epoch=48]\n", - "119it [00:30, 3.96it/s, avg_epoch_loss=1.09, epoch=49]\n" + "119it [02:44, 1.39s/it, avg_epoch_loss=1.18, epoch=0]\n" ] } ], @@ -234,16 +196,7 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "from pts.evaluation import make_evaluation_predictions, Evaluator" - ] - }, - { - "cell_type": "code", - "execution_count": 13, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -256,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -266,17 +219,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running evaluation: 100%|██████████| 30490/30490 [00:01<00:00, 23152.64it/s]\n" - ] - } - ], + "outputs": [], "source": [ "evaluator = Evaluator()\n", "agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test))" @@ -284,60 +229,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"MSE\": 4.439620313754265,\n", - " \"abs_error\": 807089.0,\n", - " \"abs_target_sum\": 1231764.0,\n", - " \"abs_target_mean\": 1.4428196598416343,\n", - " \"seasonal_error\": 1.1272178349378457,\n", - " \"MASE\": 0.8789472000957106,\n", - " \"MAPE\": 0.30587227335898637,\n", - " \"sMAPE\": 0.6816909686747539,\n", - " \"OWA\": NaN,\n", - " \"MSIS\": 7.28829495943688,\n", - " \"QuantileLoss[0.1]\": 228315.8,\n", - " \"Coverage[0.1]\": 0.0042929766199690765,\n", - " \"QuantileLoss[0.2]\": 422650.8,\n", - " \"Coverage[0.2]\": 0.01732535257461463,\n", - " \"QuantileLoss[0.3]\": 586642.4,\n", - " \"Coverage[0.3]\": 0.042479970013587595,\n", - " \"QuantileLoss[0.4]\": 716891.6,\n", - " \"Coverage[0.4]\": 0.08317012603663966,\n", - " \"QuantileLoss[0.5]\": 807089.0,\n", - " \"Coverage[0.5]\": 0.14288174108607035,\n", - " \"QuantileLoss[0.6]\": 854345.2,\n", - " \"Coverage[0.6]\": 0.2176439582064377,\n", - " \"QuantileLoss[0.7]\": 842037.0,\n", - " \"Coverage[0.7]\": 0.3306306517359322,\n", - " \"QuantileLoss[0.8]\": 755156.7999999999,\n", - " \"Coverage[0.8]\": 0.48669352949444783,\n", - " \"QuantileLoss[0.9]\": 547328.7999999999,\n", - " \"Coverage[0.9]\": 0.7026999484608537,\n", - " \"RMSE\": 2.1070406530853325,\n", - " \"NRMSE\": 1.4603631429007586,\n", - " \"ND\": 0.655230222672525,\n", - " \"wQuantileLoss[0.1]\": 0.185356772888313,\n", - " \"wQuantileLoss[0.2]\": 0.34312644305240286,\n", - " \"wQuantileLoss[0.3]\": 0.47626201122942385,\n", - " \"wQuantileLoss[0.4]\": 0.5820040202506324,\n", - " \"wQuantileLoss[0.5]\": 0.655230222672525,\n", - " \"wQuantileLoss[0.6]\": 0.6935948769407126,\n", - " \"wQuantileLoss[0.7]\": 0.6836025407464417,\n", - " \"wQuantileLoss[0.8]\": 0.6130693866682253,\n", - " \"wQuantileLoss[0.9]\": 0.44434550774336634,\n", - " \"mean_wQuantileLoss\": 0.5196213091324493,\n", - " \"MAE_Coverage\": 0.2746868606412719\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "print(json.dumps(agg_metrics, indent=4))" ] @@ -390,7 +284,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.6" } }, "nbformat": 4, diff --git a/pts/__init__.py b/pts/__init__.py index 0a48be3..ce92987 100644 --- a/pts/__init__.py +++ b/pts/__init__.py @@ -2,7 +2,6 @@ from pkgutil import extend_path from pkg_resources import get_distribution, DistributionNotFound -from .exception import assert_pts from .trainer import Trainer __path__ = extend_path(__path__, __name__) # type: ignore @@ -10,4 +9,4 @@ __path__ = extend_path(__path__, __name__) # type: ignore try: __version__ = get_distribution(__name__).version except DistributionNotFound: - __version__ = "0.0.0-unknown" \ No newline at end of file + __version__ = "0.0.0-unknown" diff --git a/pts/core/__init__.py b/pts/core/__init__.py deleted file mode 100644 index 9687f13..0000000 --- a/pts/core/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Relative imports -from ._base import fqname_for - -__all__ = ["fqname_for"] - -# fix Sphinx issues, see https://bit.ly/2K2eptM -for item in __all__: - if hasattr(item, "__module__"): - setattr(item, "__module__", __name__) \ No newline at end of file diff --git a/pts/core/_base.py b/pts/core/_base.py deleted file mode 100644 index 93ed653..0000000 --- a/pts/core/_base.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -def fqname_for(cls: type) -> str: - """ - Returns the fully qualified name of ``cls``. - - Parameters - ---------- - cls - The class we are interested in. - - Returns - ------- - str - The fully qualified name of ``cls``. - """ - return f"{cls.__module__}.{cls.__qualname__}" \ No newline at end of file diff --git a/pts/core/component.py b/pts/core/component.py deleted file mode 100644 index 5dc2611..0000000 --- a/pts/core/component.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import functools -import inspect -from collections import OrderedDict -from typing import Any - -import torch -from pydantic import BaseConfig, BaseModel, create_model - -from pts.core.serde import dump_code - - -class BaseValidatedInitializerModel(BaseModel): - """ - Base Pydantic model for components with :func:`validated` initializers. - - See Also - -------- - validated - Decorates an initializer methods with argument validation logic. - """ - - class Config(BaseConfig): - """ - `Config `_ for the - Pydantic model inherited by all :func:`validated` initializers. - - Allows the use of arbitrary type annotations in initializer parameters. - """ - - arbitrary_types_allowed = True - - -def validated(base_model=None): - """ - Decorates an ``__init__`` method with typed parameters with validation - and auto-conversion logic. - - >>> class ComplexNumber: - ... @validated() - ... def __init__(self, x: float = 0.0, y: float = 0.0) -> None: - ... self.x = x - ... self.y = y - - Classes with decorated initializers can be instantiated using arguments of - another type (e.g. an ``y`` argument of type ``str`` ). The decorator - handles the type conversion logic. - - >>> c = ComplexNumber(y='42') - >>> (c.x, c.y) - (0.0, 42.0) - - If the bound argument cannot be converted, the decorator throws an error. - - >>> c = ComplexNumber(y=None) - Traceback (most recent call last): - ... - pydantic.error_wrappers.ValidationError: 1 validation error for ComplexNumberModel - y - none is not an allowed value (type=type_error.none.not_allowed) - - Internally, the decorator delegates all validation and conversion logic to - `a Pydantic model `_, which can be - accessed through the ``Model`` attribute of the decorated initiazlier. - - >>> ComplexNumber.__init__.Model - - - The Pydantic model is synthesized automatically from on the parameter - names and types of the decorated initializer. In the ``ComplexNumber`` - example, the synthesized Pydantic model corresponds to the following - definition. - - >>> class ComplexNumberModel(BaseValidatedInitializerModel): - ... x: float = 0.0 - ... y: float = 0.0 - - - Clients can optionally customize the base class of the synthesized - Pydantic model using the ``base_model`` decorator parameter. The default - behavior uses :class:`BaseValidatedInitializerModel` and its - `model config `_. - - See Also - -------- - BaseValidatedInitializerModel - Default base class for all synthesized Pydantic models. - """ - - def validator(init): - init_qualname = dict(inspect.getmembers(init))["__qualname__"] - init_clsnme = init_qualname.split(".")[0] - init_params = inspect.signature(init).parameters - init_fields = { - param.name: ( - param.annotation - if param.annotation != inspect.Parameter.empty - else Any, - param.default if param.default != inspect.Parameter.empty else ..., - ) - for param in init_params.values() - if param.name != "self" - and param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD - } - - if base_model is None: - PydanticModel = create_model( - f"{init_clsnme}Model", - __config__=BaseValidatedInitializerModel.Config, - **init_fields, - ) - else: - PydanticModel = create_model( - f"{init_clsnme}Model", __base__=base_model, **init_fields, - ) - - def validated_repr(self) -> str: - return dump_code(self) - - def validated_getnewargs_ex(self): - return (), self.__init_args__ - - @functools.wraps(init) - def init_wrapper(*args, **kwargs): - self, *args = args - - nmargs = { - name: arg - for (name, param), arg in zip(list(init_params.items()), [self] + args) - if name != "self" - } - model = PydanticModel(**{**nmargs, **kwargs}) - - # merge nmargs, kwargs, and the model fields into a single dict - all_args = {**nmargs, **kwargs, **model.__dict__} - - # save the merged dictionary for Representable use, but only of the - # __init_args__ is not already set in order to avoid overriding a - # value set by a subclass initializer in super().__init__ calls - if not getattr(self, "__init_args__", {}): - self.__init_args__ = OrderedDict( - { - name: arg - for name, arg in sorted(all_args.items()) - if type(arg) != torch.nn.ParameterDict - } - ) - self.__class__.__getnewargs_ex__ = validated_getnewargs_ex - self.__class__.__repr__ = validated_repr - - return init(self, **all_args) - - # attach the Pydantic model as the attribute of the initializer wrapper - setattr(init_wrapper, "Model", PydanticModel) - - return init_wrapper - - return validator diff --git a/pts/core/serde.py b/pts/core/serde.py deleted file mode 100644 index 18531ca..0000000 --- a/pts/core/serde.py +++ /dev/null @@ -1,374 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import itertools -import json -import math -import textwrap -from functools import singledispatch -from pydoc import locate -from typing import Any, Optional, cast, NamedTuple - -import numpy as np - -from pts.core import fqname_for - -bad_type_msg = textwrap.dedent( - """ - Cannot serialize type {}. See the documentation of the `encode` and - `validate` functions at - - http://gluon-ts.mxnet.io/api/gluonts/gluonts.html - - and the Python documentation of the `__getnewargs_ex__` magic method at - - https://docs.python.org/3/library/pickle.html#object.__getnewargs_ex__ - - for more information how to make this type serializable. - """ -).lstrip() - - -def dump_code(o: Any) -> str: - """ - Serializes an object to a Python code string. - - Parameters - ---------- - o - The object to serialize. - - Returns - ------- - str - A string representing the object as Python code. - - See Also - -------- - load_code - Inverse function. - """ - - def _dump_code(x: Any) -> str: - # r = { 'class': ..., 'args': ... } - # r = { 'class': ..., 'kwargs': ... } - if type(x) == dict and x.get("__kind__") == kind_inst: - args = x.get("args", []) - kwargs = x.get("kwargs", {}) - - fqname = x["class"] - bindings = ", ".join( - itertools.chain( - map(_dump_code, args), - [f"{k}={_dump_code(v)}" for k, v in kwargs.items()], - ) - ) - return f"{fqname}({bindings})" - - if type(x) == dict and x.get("__kind__") == kind_type: - return x["class"] - - if isinstance(x, dict): - inner = ", ".join( - f"{_dump_code(k)}: {_dump_code(v)}" for k, v in x.items() - ) - return f"{{{inner}}}" - - if isinstance(x, list): - inner = ", ".join(list(map(dump_code, x))) - return f"[{inner}]" - - if isinstance(x, tuple): - inner = ", ".join(list(map(dump_code, x))) - # account for the extra `,` in `(x,)` - if len(x) == 1: - inner += "," - return f"({inner})" - - if isinstance(x, str): - # json.dumps escapes the string - return json.dumps(x) - - if isinstance(x, float) or np.issubdtype(type(x), np.inexact): - if math.isfinite(x): - return str(x) - else: - # e.g. `nan` needs to be encoded as `float("nan")` - return 'float("{x}")' - - if isinstance(x, int) or np.issubdtype(type(x), np.integer): - return str(x) - - if x is None: - return str(x) - - raise RuntimeError( - f"Unexpected element type {fqname_for(x.__class__)}" - ) - - return _dump_code(encode(o)) - -# JSON Serialization/Deserialization -# ---------------------------------- - -# The canonical way to do this is to define and `default` and `object_hook` -# parameters to the json.dumps and json.loads methods. Unfortunately, due -# to https://bugs.python.org/issue12657 this is not possible at the moment, -# as support for custom NamedTuple serialization is broken. -# -# To circumvent the issue, we pass the input value through custom encode -# and decode functions that map nested object terms to JSON-serializable -# data structures with explicit recursion. - - - -def dump_json(o: Any, indent: Optional[int] = None) -> str: - """ - Serializes an object to a JSON string. - Parameters - ---------- - o - The object to serialize. - indent - An optional number of spaced to use as an indent. - Returns - ------- - str - A string representing the object in JSON format. - See Also - -------- - load_json - Inverse function. - """ - return json.dumps(encode(o), indent=indent, sort_keys=True) - - -def load_json(s: str) -> Any: - """ - Deserializes an object from a JSON string. - Parameters - ---------- - s - A string representing the object in JSON format. - Returns - ------- - Any - The deserialized object. - See Also - -------- - dump_json - Inverse function. - """ - return decode(json.loads(s)) - - -# Structural encoding/decoding -# ---------------------------- - -kind_type = "type" -kind_inst = "instance" - - -@singledispatch -def encode(v: Any) -> Any: - """ - Transforms a value `v` as a serializable intermediate representation (for - example, named tuples are encoded as dictionaries). The intermediate - representation is then recursively traversed and serialized either as - Python code or as JSON string. - - This function is decorated with :func:`~functools.singledispatch` and can - be specialized by clients for families of types that are not supported by - the basic implementation (explained below). - - Examples - -------- - - The conversion logic implemented by the basic implementation is used - as a fallback and is best explained by a series of examples. - - Lists (as lists). - - >>> encode([1, 2.0, '3']) - [1, 2.0, '3'] - - Tuples (as lists). - - >>> encode((1, 2.0, '3')) - [1, 2.0, '3'] - - Dictionaries (as dictionaries). - - >>> encode({'a': 1, 'b': 2.0, 'c': '3'}) - {'a': 1, 'b': 2.0, 'c': '3'} - - Named tuples (as dictionaries with a ``'__kind__': 'instance'`` member). - - >>> from pprint import pprint - >>> from typing import NamedTuple - >>> class ComplexNumber(NamedTuple): - ... x: float = 0.0 - ... y: float = 0.0 - >>> pprint(encode(ComplexNumber(4.0, 2.0))) - {'__kind__': 'instance', - 'class': 'gluonts.core.serde.ComplexNumber', - 'kwargs': {'x': 4.0, 'y': 2.0}} - - Classes with a :func:`~gluonts.core.component.validated` initializer (as - dictionaries with a ``'__kind__': 'instance'`` member). - - >>> from gluonts.core.component import validated - >>> class ComplexNumber: - ... @validated() - ... def __init__(self, x: float = 0.0, y: float = 0.0) -> None: - ... self.x = x - ... self.y = y - >>> pprint(encode(ComplexNumber(4.0, 2.0))) - {'__kind__': 'instance', - 'args': [], - 'class': 'gluonts.core.serde.ComplexNumber', - 'kwargs': {'x': 4.0, 'y': 2.0}} - - Classes with a ``__getnewargs_ex__`` magic method (as dictionaries with a - ``'__kind__': 'instance'`` member). - - >>> from gluonts.core.component import validated - >>> class ComplexNumber: - ... def __init__(self, x: float = 0.0, y: float = 0.0) -> None: - ... self.x = x - ... self.y = y - ... def __getnewargs_ex__(self): - ... return [], {'x': self.x, 'y': self.y} - >>> pprint(encode(ComplexNumber(4.0, 2.0))) - {'__kind__': 'instance', - 'args': [], - 'class': 'gluonts.core.serde.ComplexNumber', - 'kwargs': {'x': 4.0, 'y': 2.0}} - - - Types (as dictionaries with a ``'__kind__': 'type' member``). - - >>> encode(ComplexNumber) - {'__kind__': 'type', 'class': 'gluonts.core.serde.ComplexNumber'} - - Parameters - ---------- - v - The value to be encoded. - - Returns - ------- - Any - An encoding of ``v`` that can be serialized to Python code or - JSON string. - - See Also - -------- - decode - Inverse function. - dump_json - Serializes an object to a JSON string. - dump_code - Serializes an object to a Python code string. - """ - if isinstance(v, type(None)): - return None - - if isinstance(v, (float, int, str)): - return v - - if np.issubdtype(type(v), np.inexact): - return float(v) - - if np.issubdtype(type(v), np.integer): - return int(v) - - # we have to check for namedtuples first, to encode them not as plain - # tuples (which would become lists) - if isinstance(v, tuple) and hasattr(v, "_asdict"): - v = cast(NamedTuple, v) - return { - "__kind__": kind_inst, - "class": fqname_for(v.__class__), - "kwargs": encode(v._asdict()), - } - - if isinstance(v, (list, set, tuple)): - return list(map(encode, v)) - - if isinstance(v, dict): - return {k: encode(v) for k, v in v.items()} - - if isinstance(v, type): - return {"__kind__": kind_type, "class": fqname_for(v)} - - if hasattr(v, "__getnewargs_ex__"): - args, kwargs = v.__getnewargs_ex__() # mypy: ignore - return { - "__kind__": kind_inst, - "class": fqname_for(v.__class__), - "args": encode(args), - "kwargs": encode(kwargs), - } - - raise RuntimeError(bad_type_msg.format(fqname_for(v.__class__))) - - -def decode(r: Any) -> Any: - """ - Decodes a value from an intermediate representation `r`. - - Parameters - ---------- - r - An intermediate representation to be decoded. - - Returns - ------- - Any - A Python data structure corresponding to the decoded version of ``r``. - - See Also - -------- - encode - Inverse function. - """ - - # structural recursion over the possible shapes of r - # r = { 'class': ..., 'args': ... } - # r = { 'class': ..., 'kwargs': ... } - if type(r) == dict and r.get("__kind__") == kind_inst: - cls = cast(Any, locate(r["class"])) - args = decode(r["args"]) if "args" in r else [] - kwargs = decode(r["kwargs"]) if "kwargs" in r else {} - return cls(*args, **kwargs) - # r = { 'class': ..., 'args': ... } - # r = { 'class': ..., 'kwargs': ... } - if type(r) == dict and r.get("__kind__") == kind_type: - return locate(r["class"]) - # r = { k1: v1, ..., kn: vn } - elif type(r) == dict: - return {k: decode(v) for k, v in r.items()} - # r = ( y1, ..., yn ) - elif type(r) == tuple: - return tuple([decode(y) for y in r]) - # r = [ y1, ..., yn ] - elif type(r) == list: - return [decode(y) for y in r] - # r = { y1, ..., yn } - elif type(r) == set: - return {decode(y) for y in r} - # r = a - else: - return r diff --git a/pts/dataset/__init__.py b/pts/dataset/__init__.py deleted file mode 100644 index 699a841..0000000 --- a/pts/dataset/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -from .artificial import ( - ArtificialDataset, - ConstantDataset, - ComplexSeasonalTimeSeries, - RecipeDataset, - constant_dataset, - default_synthetic, - generate_sf2, -) -from .common import ( - DataEntry, - FieldName, - Dataset, - MetaData, - TrainDatasets, - DateConstants, -) -from .file_dataset import FileDataset -from .list_dataset import ListDataset -from .loader import TrainDataLoader, InferenceDataLoader -from .multivariate_grouper import MultivariateGrouper -from .process import ProcessStartField, ProcessDataEntry -from .stat import DatasetStatistics, ScaleHistogram, calculate_dataset_statistics -from .transformed_iterable_dataset import TransformedIterableDataset -from .utils import ( - to_pandas, - load_datasets, - save_datasets, - serialize_data_entry, - frequency_add, - forecast_start, -) diff --git a/pts/dataset/artificial.py b/pts/dataset/artificial.py deleted file mode 100644 index 5d016d8..0000000 --- a/pts/dataset/artificial.py +++ /dev/null @@ -1,834 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import math -import os -import random -from typing import Callable, List, NamedTuple, Optional, Tuple, Union - -import numpy as np -import pandas as pd -import rapidjson as json - -from .common import ( - MetaData, - CategoricalFeatureInfo, - BasicFeatureInfo, - FieldName, - Dataset, - TrainDatasets, - DataEntry, -) -from .list_dataset import ListDataset -from .recipe import ( - BinaryHolidays, - BinaryMarkovChain, - Constant, - ForEachCat, - Lag, - LinearTrend, - RandomCat, - RandomGaussian, - Stack, - generate, - take_as_list, -) -from .stat import DatasetStatistics, calculate_dataset_statistics - - -class DatasetInfo(NamedTuple): - """ - Information stored on a dataset. When downloading from the repository, the - dataset repository checks that the obtained version matches the one - declared in dataset_info/dataset_name.json. - """ - - name: str - metadata: MetaData - prediction_length: int - train_statistics: DatasetStatistics - test_statistics: DatasetStatistics - - -class ArtificialDataset: - """ - Parent class of a dataset that can be generated from code. - """ - - def __init__(self, freq) -> None: - self.freq = freq - - @property - def metadata(self) -> MetaData: - pass - - @property - def train(self) -> List[DataEntry]: - pass - - @property - def test(self) -> List[DataEntry]: - pass - - # todo return the same type as dataset repo for better usability - def generate(self) -> TrainDatasets: - return TrainDatasets( - metadata=self.metadata, - train=ListDataset(self.train, self.freq), - test=ListDataset(self.test, self.freq), - ) - - -class ConstantDataset(ArtificialDataset): - def __init__( - self, - num_timeseries: int = 10, - num_steps: int = 30, - freq: str = "1H", - start: str = "2000-01-01 00:00:00", - is_nan: bool = False, # Generates constant dataset of 0s with explicit NaN missing values - is_random_constant: bool = False, # Inserts random constant value for each time series - is_different_scales: bool = False, # Generates constants on various scales - is_piecewise: bool = False, # Determines whether the time series in the test - # and train set should have different constant values - is_noise: bool = False, # Determines whether to add Gaussian noise to the constant dataset - is_long: bool = False, # Determines whether some time series will have very long lengths - is_short: bool = False, # Determines whether some time series will have very short lengths - is_trend: bool = False, # Determines whether to add linear trends - num_missing_middle: int = 0, # Number of missing values in the middle of the time series - is_promotions: bool = False, # Determines whether to add promotions to the target time series - # and to store in metadata - holidays: Optional[ - List[pd.Timestamp] - ] = None, # Determines whether to add holidays to the target time series - # and to store in metadata - ) -> None: - super(ConstantDataset, self).__init__(freq) - self.num_timeseries = num_timeseries - self.num_steps = num_steps - self.num_training_steps = self.num_steps // 10 * 8 - self.prediction_length = self.num_steps - self.num_training_steps - self.start = start - self.is_nan = is_nan - self.is_random_constant = is_random_constant - self.is_different_scales = is_different_scales - self.is_piecewise = is_piecewise - self.is_noise = is_noise - self.is_long = is_long - self.is_short = is_short - self.is_trend = is_trend - self.num_missing_middle = num_missing_middle - self.is_promotions = is_promotions - self.holidays = holidays - - @property - def metadata(self) -> MetaData: - metadata = MetaData( - freq=self.freq, - feat_static_cat=[ - { - "name": "feat_static_cat_000", - "cardinality": str(self.num_timeseries), - } - ], - feat_static_real=[{"name": "feat_static_real_000"}], - prediction_length=self.prediction_length, - ) - if self.is_promotions or self.holidays: - metadata = MetaData( - freq=self.freq, - feat_static_cat=[ - { - "name": "feat_static_cat_000", - "cardinality": str(self.num_timeseries), - } - ], - feat_static_real=[{"name": "feat_static_real_000"}], - feat_dynamic_real=[BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL)], - prediction_length=self.prediction_length, - ) - return metadata - - def determine_constant( - self, index: int, constant: Optional[float] = None, seed: int = 1 - ) -> Optional[float]: - if self.is_random_constant: - my_random = random.Random(seed) - constant = (index + 1) * my_random.random() - elif self.is_different_scales: - if index == 0: - constant = 1e-8 - elif constant is not None: - constant *= 100 - else: - constant = float(index) - return constant - - def compute_data_from_recipe( - self, - num_steps: int, - constant: Optional[float] = None, - one_to_zero: float = 0.1, - zero_to_one: float = 0.1, - scale_features: float = 200, - ) -> TrainDatasets: - recipe = [] - recipe_type = Constant(constant) - if self.is_noise: - recipe_type += RandomGaussian() # Use default stddev = 1.0 - if self.is_trend: - recipe_type += LinearTrend() - if self.is_promotions: - recipe.append( - ("binary_causal", BinaryMarkovChain(one_to_zero, zero_to_one)) - ) - recipe.append((FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_causal"]))) - recipe_type += scale_features * Lag("binary_causal", lag=0) - if self.holidays: - timestamp = self.init_date() - # Compute dates array - dates = [] - for i in range(num_steps): - dates.append(timestamp) - timestamp += 1 - recipe.append(("binary_holidays", BinaryHolidays(dates, self.holidays))) - recipe.append((FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_holidays"]))) - recipe_type += scale_features * Lag("binary_holidays", lag=0) - recipe.append((FieldName.TARGET, recipe_type)) - max_train_length = num_steps - self.prediction_length - data = RecipeDataset( - recipe=recipe, - metadata=self.metadata, - max_train_length=max_train_length, - prediction_length=self.prediction_length, - num_timeseries=1, # Add 1 time series at a time in the loop for different constant valus per time series - ) - generated = data.generate() - return generated - - def piecewise_constant(self, index: int, num_steps: int) -> List: - target = [] - for j in range(num_steps): - if j < self.num_training_steps: - constant = self.determine_constant(index=index) - else: - constant = self.determine_constant(index=index, seed=2) - target.append(constant) - return target - - def get_num_steps( - self, - index: int, - num_steps_max: int = 10000, - long_freq: int = 4, - num_steps_min: int = 2, - short_freq: int = 4, - ) -> int: - num_steps = self.num_steps - if self.is_long and index % long_freq == 0: - num_steps = num_steps_max - elif self.is_short and index % short_freq == 0: - num_steps = num_steps_min - return num_steps - - def init_date(self) -> pd.Timestamp: - week_dict = { - 0: "MON", - 1: "TUE", - 2: "WED", - 3: "THU", - 4: "FRI", - 5: "SAT", - 6: "SUN", - } - timestamp = pd.Timestamp(self.start) - freq_week_start = self.freq - if freq_week_start == "W": - freq_week_start = f"W-{week_dict[timestamp.weekday()]}" - return pd.Timestamp(self.start, freq=freq_week_start) - - @staticmethod - def insert_nans_and_zeros(ts_len: int) -> List: - target = [] - for j in range(ts_len): - # Place NaNs at even indices. Use convention no NaNs before start date. - if j != 0 and j % 2 == 0: - target.append(np.nan) - # Place zeros at odd indices - else: - target.append(0.0) - return target - - def insert_missing_vals_middle( - self, ts_len: int, constant: Optional[float] - ) -> List: - target = [] - lower_bound = (self.num_training_steps - self.num_missing_middle) // 2 - upper_bound = (self.num_training_steps + self.num_missing_middle) // 2 - num_missing_endpts = math.floor(0.1 * self.num_missing_middle) - for j in range(ts_len): - if ( - (0 < j < lower_bound and j % (2 * num_missing_endpts) == 0) - or (lower_bound <= j < upper_bound) - or (j >= upper_bound and j % (2 * num_missing_endpts) == 0) - ): - val = np.nan - else: - val = constant - target.append(val) - return target - - def generate_ts(self, num_ts_steps: int, is_train: bool = False) -> List[DataEntry]: - res = [] - constant = None - for i in range(self.num_timeseries): - if self.is_nan: - target = self.insert_nans_and_zeros(num_ts_steps) - elif self.is_piecewise: - target = self.piecewise_constant(i, num_ts_steps) - else: - constant = self.determine_constant(i, constant) - if self.num_missing_middle > 0: - target = self.insert_missing_vals_middle(num_ts_steps, constant) - elif ( - self.is_noise - or self.is_trend - or self.is_promotions - or self.holidays - ): - - num_steps = self.get_num_steps(i) - generated = self.compute_data_from_recipe(num_steps, constant) - if is_train: - time_series = generated.train - else: - assert generated.test is not None - time_series = generated.test - # returns np array convert to list for consistency - target = list(time_series)[0][FieldName.TARGET].tolist() - else: - target = [constant] * num_ts_steps - ts_data = dict( - start=self.start, - target=target, - item_id=str(i), - feat_static_cat=[i], - feat_static_real=[i], - ) - if self.is_promotions or self.holidays: - ts_data[FieldName.FEAT_DYNAMIC_REAL] = list(time_series)[0][ - FieldName.FEAT_DYNAMIC_REAL - ].tolist() - res.append(ts_data) - return res - - @property - def train(self) -> List[DataEntry]: - return self.generate_ts(num_ts_steps=self.num_training_steps, is_train=True) - - @property - def test(self) -> List[DataEntry]: - return self.generate_ts(num_ts_steps=self.num_steps) - - -class ComplexSeasonalTimeSeries(ArtificialDataset): - """ - Generate sinus time series that ramp up and reach a certain amplitude, and - level and have additional spikes on each sunday. - - - TODO: This could be converted to a RecipeDataset to avoid code duplication. - """ - - def __init__( - self, - num_series: int = 100, - prediction_length: int = 20, - freq_str: str = "D", - length_low: int = 30, - length_high: int = 200, - min_val: float = -10000, - max_val: float = 10000, - is_integer: bool = False, - proportion_missing_values: float = 0, - is_noise: bool = True, - is_scale: bool = True, - percentage_unique_timestamps: float = 0.07, - is_out_of_bounds_date: bool = False, - seasonality: Optional[int] = None, - clip_values: bool = False, - ) -> None: - """ - :param num_series: number of time series generated in the train and - test set - :param prediction_length: - :param freq_str: - :param length_low: minimum length of a time-series, must be larger than - prediction_length - :param length_high: maximum length of a time-series - :param min_val: min value of a time-series - :param max_val: max value of a time-series - :param is_integer: whether the dataset has integers or not - :param proportion_missing_values: - :param is_noise: whether to add noise - :param is_scale: whether to add scale - :param percentage_unique_timestamps: percentage of random start dates bounded between 0 and 1 - :param is_out_of_bounds_date: determines whether to use very old start dates and start dates far in the future - :param seasonality: Seasonality of the generated data. If not given uses default seasonality for frequency - :param clip_values: if True the values will be clipped to [min_val, max_val], otherwise linearly scales them - """ - assert length_low > prediction_length - super(ComplexSeasonalTimeSeries, self).__init__(freq_str) - self.num_series = num_series - self.prediction_length = prediction_length - self.length_low = length_low - self.length_high = length_high - self.freq_str = freq_str - self.min_val = min_val - self.max_val = max_val - self.is_integer = is_integer - self.proportion_missing_values = proportion_missing_values - self.is_noise = is_noise - self.is_scale = is_scale - self.percentage_unique_timestamps = percentage_unique_timestamps - self.is_out_of_bounds_date = is_out_of_bounds_date - self.seasonality = seasonality - self.clip_values = clip_values - - @property - def metadata(self) -> MetaData: - return MetaData(freq=self.freq, prediction_length=self.prediction_length) - - def _get_period(self) -> int: - if self.seasonality is not None: - return self.seasonality - if self.freq_str == "M": - return 24 - elif self.freq_str == "W": - return 52 - elif self.freq_str == "D": - return 14 - elif self.freq_str == "H": - return 24 - elif self.freq_str == "min": - return 60 - else: - raise RuntimeError() - - def _get_start(self, index: int, my_random: random.Random) -> str: - if ( - self.is_out_of_bounds_date and index == 0 - ): # Add edge case of dates out of normal bounds past date - start_y, start_m, start_d = ( - 1690, - 2, - 7, - ) # Pandas doesn't allot before 1650 - start_h, start_min = 18, 36 - elif ( - self.is_out_of_bounds_date and index == self.num_series - 1 - ): # Add edge case of dates out of normal bounds future date - start_y, start_m, start_d = ( - 2030, - 6, - 3, - ) # Pandas doesn't allot before 1650 - start_h, start_min = 18, 36 - # assume that only 100 * percentage_unique_timestamps of timestamps are unique - elif my_random.random() < self.percentage_unique_timestamps: - start_y = my_random.randint(2000, 2018) - start_m = my_random.randint(1, 12) - start_d = my_random.randint(1, 28) - start_h = my_random.randint(0, 23) - start_min = my_random.randint(0, 59) - else: - start_y, start_m, start_d = 2013, 11, 28 - start_h, start_min = 18, 36 - - if self.freq_str == "M": - return "%04.d-%02.d" % (start_y, start_m) - elif self.freq_str in ["W", "D"]: - return "%04.d-%02.d-%02.d" % (start_y, start_m, start_d) - elif self.freq_str == "H": - return "%04.d-%02.d-%02.d %02.d:00:00" % ( - start_y, - start_m, - start_d, - start_h, - ) - else: - return "%04.d-%02.d-%02.d %02.d:%02.d:00" % ( - start_y, - start_m, - start_d, - start_h, - start_min, - ) - - def _special_time_point_indicator(self, index) -> bool: - if self.freq_str == "M": - return index.month == 1 - elif self.freq_str == "W": - return index.month % 2 == 0 - elif self.freq_str == "D": - return index.dayofweek == 0 - elif self.freq_str == "H": - return index.hour == 0 - elif self.freq_str == "min": - return index.minute % 30 == 0 - else: - raise RuntimeError(f'Bad freq_str value "{index}"') - - @property - def train(self) -> List[DataEntry]: - return [ - dict( - start=ts[FieldName.START], - target=ts[FieldName.TARGET][: -self.prediction_length], - item_id=ts[FieldName.ITEM_ID], - ) - for ts in self.make_timeseries() - ] - - @property - def test(self) -> List[DataEntry]: - return self.make_timeseries() - - def make_timeseries(self, seed: int = 1) -> List[DataEntry]: - res = [] - # Fix seed so that the training set is the same - # as the test set from 0:self.prediction_length for the two independent calls - - def sigmoid(x: np.ndarray) -> np.ndarray: - return 1.0 / (1.0 + np.exp(-x)) - - # Ensure same start dates in test and training set - my_random = random.Random(seed) - state = np.random.RandomState(seed) - for i in range(self.num_series): - val_range = self.max_val - self.min_val - length = state.randint(low=self.length_low, high=self.length_high) - start = self._get_start(i, my_random) - envelope = sigmoid((np.arange(length) - 20.0) / 10.0) - level = 0.3 * val_range * (state.random_sample() - 0.5) - phi = 2 * np.pi * state.random_sample() - period = self._get_period() - w = 2 * np.pi / period - t = np.arange(length) - idx = pd.date_range(start=start, freq=self.freq_str, periods=length) - special_tp_indicator = self._special_time_point_indicator(idx) - sunday_effect = state.random_sample() * special_tp_indicator - v = np.sin(w * t + phi) + sunday_effect - - if self.is_scale: - scale = 0.1 * val_range * state.random_sample() - v *= scale - v += level - if self.is_noise: - noise_range = 0.02 * val_range * state.random_sample() - noise = noise_range * state.normal(size=length) - v += noise - v = envelope * v - if self.clip_values: - np.clip(v, a_min=self.min_val, a_max=self.max_val, out=v) - else: - """ - Rather than mapping [v_min, v_max] to [self.min_val, self.max_val] which would lead to - all the time series having the same min and max, we want to keep the same interval length - (v_max - v_min). We thus shift the interval [v_min, v_max] in [self.min_val, self.max_val] - and clip it if needed. - """ - v_min, v_max = v.min(), v.max() - p_min, p_max = ( - max(self.min_val, v_min), - min(self.max_val, v_max), - ) - shifted_min = np.clip( - p_min + (p_max - v_max), a_min=self.min_val, a_max=self.max_val, - ) - shifted_max = np.clip( - p_max + (p_min - v_min), a_min=self.min_val, a_max=self.max_val, - ) - v = shifted_min + (shifted_max - shifted_min) * (v - v_min) / ( - v_max - v_min - ) - - if self.is_integer: - np.clip( - v, a_min=np.ceil(self.min_val), a_max=np.floor(self.max_val), out=v, - ) - v = np.round(v).astype(int) - v = list(v.tolist()) - if self.proportion_missing_values > 0: - assert ( - self.proportion_missing_values < 1.0 - ), "Please chose a number 0 < x < 1.0" - idx = np.arange(len(v)) - state.shuffle(idx) - num_missing_values = ( - int(len(v) * self.proportion_missing_values) + 1 - ) # Add one in case this gets zero - missing_idx = idx[:num_missing_values] - for j in missing_idx: - # Using convention that there are no missing values before the start date. - if j != 0: - v[j] = None if state.rand() < 0.5 else "NaN" - res.append( - dict( - start=pd.Timestamp(start, freq=self.freq_str), - target=np.array(v), - item_id=i, - ) - ) - return res - - -class RecipeDataset(ArtificialDataset): - """Synthetic data set generated by providing a recipe. - - A recipe is either a (non-deterministic) function - - f(length: int, global_state: dict) -> dict - - or list of (field, function) tuples of the form - - (field: str, f(data: dict, length: int, global_state: dict) -> dict) - - which is processed sequentially, with data initially set to {}, - and each entry updating data[field] to the output of the function - call. - """ - - def __init__( - self, - recipe: Union[Callable, List[Tuple[str, Callable]]], - metadata: MetaData, - max_train_length: int, - prediction_length: int, - num_timeseries: int, - trim_length_fun=lambda x, **kwargs: 0, - data_start=pd.Timestamp("2014-01-01"), - ) -> None: - """ - - :param recipe: The recipe to generate from (see class docstring) - :param metadata: The metadata to be included in the dataset - :param max_train_length: The maximum length of a training time series. - :param prediction_length: The length of the prediction range - :param num_timeseries: Number of time series to generate - :param trim_length_fun: Callable f(x: int) -> int returning the - (shortened) training length - :param data_start: Start date for the data set - """ - super().__init__(freq=metadata.freq) - - self.recipe = recipe - self._metadata = metadata - self.max_train_length = max_train_length - self.prediction_length = prediction_length - self.trim_length_fun = trim_length_fun - self.num_timeseries = num_timeseries - self.data_start = pd.Timestamp(data_start, freq=self._metadata.freq) - - @property - def metadata(self) -> MetaData: - return self._metadata - - def dataset_info(self, train_ds: Dataset, test_ds: Dataset) -> DatasetInfo: - return DatasetInfo( - name=f"RecipeDataset({repr(self.recipe)})", - metadata=self.metadata, - prediction_length=self.prediction_length, - train_statistics=calculate_dataset_statistics(train_ds), - test_statistics=calculate_dataset_statistics(test_ds), - ) - - @staticmethod - def trim_ts_item_end(x: DataEntry, length: int) -> DataEntry: - """Trim a TimeSeriesItem into a training range, by removing - the last prediction_length time points from the target and dynamic - features.""" - y = dict( - item_id=x[FieldName.ITEM_ID], - start=x[FieldName.START], - target=x[FieldName.TARGET][:-length], - ) - - if FieldName.FEAT_DYNAMIC_CAT in x: - y[FieldName.FEAT_DYNAMIC_CAT] = x[FieldName.FEAT_DYNAMIC_CAT][:, :-length] - if FieldName.FEAT_DYNAMIC_REAL in x: - y[FieldName.FEAT_DYNAMIC_REAL] = x[FieldName.FEAT_DYNAMIC_REAL][:, :-length] - return y - - @staticmethod - def trim_ts_item_front(x: DataEntry, length: int) -> DataEntry: - """Trim a TimeSeriesItem into a training range, by removing - the first offset_front time points from the target and dynamic - features.""" - assert length <= len(x[FieldName.TARGET]) - - y = dict( - item_id=x[FieldName.ITEM_ID], - start=x[FieldName.START] + length * x[FieldName.START].freq, - target=x[FieldName.TARGET][length:], - ) - - if FieldName.FEAT_DYNAMIC_CAT in x: - y[FieldName.FEAT_DYNAMIC_CAT] = x[FieldName.FEAT_DYNAMIC_CAT][:, length:] - if FieldName.FEAT_DYNAMIC_REAL in x: - y[FieldName.FEAT_DYNAMIC_REAL] = x[FieldName.FEAT_DYNAMIC_REAL][:, length:] - return y - - def generate(self) -> TrainDatasets: - metadata = self.metadata - data_it = generate( - length=self.max_train_length + self.prediction_length, - recipe=self.recipe, - start=self.data_start, - ) - full_length_data = take_as_list(data_it, self.num_timeseries) - - test_data = [ - RecipeDataset.trim_ts_item_front( - x, self.trim_length_fun(x, train_length=self.max_train_length) - ) - for x in full_length_data - ] - train_data = [ - RecipeDataset.trim_ts_item_end(x, self.prediction_length) for x in test_data - ] - return TrainDatasets( - metadata=metadata, - train=ListDataset(train_data, metadata.freq), - test=ListDataset(test_data, metadata.freq), - ) - - -def default_synthetic() -> Tuple[DatasetInfo, Dataset, Dataset]: - - recipe = [ - (FieldName.TARGET, LinearTrend() + RandomGaussian()), - (FieldName.FEAT_STATIC_CAT, RandomCat([10])), - ( - FieldName.FEAT_STATIC_REAL, - ForEachCat(RandomGaussian(1, (10,)), FieldName.FEAT_STATIC_CAT) - + RandomGaussian(0.1, (10,)), - ), - ] - - data = RecipeDataset( - recipe=recipe, - metadata=MetaData( - freq="D", - feat_static_real=[BasicFeatureInfo(name=FieldName.FEAT_STATIC_REAL)], - feat_static_cat=[ - CategoricalFeatureInfo(name=FieldName.FEAT_STATIC_CAT, cardinality=10) - ], - feat_dynamic_real=[BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL)], - ), - max_train_length=20, - prediction_length=10, - num_timeseries=10, - trim_length_fun=lambda x, **kwargs: np.minimum( - int(np.random.geometric(1 / (kwargs["train_length"] / 2))), - kwargs["train_length"], - ), - ) - - generated = data.generate() - assert generated.test is not None - info = data.dataset_info(generated.train, generated.test) - - return info, generated.train, generated.test - - -def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: - metadata = MetaData( - freq="1H", - feat_static_cat=[ - CategoricalFeatureInfo(name="feat_static_cat_000", cardinality="10") - ], - feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], - ) - - start_date = "2000-01-01 00:00:00" - - train_ds = ListDataset( - data_iter=[ - { - FieldName.ITEM_ID: str(i), - FieldName.START: start_date, - FieldName.TARGET: [float(i)] * 24, - FieldName.FEAT_STATIC_CAT: [i], - FieldName.FEAT_STATIC_REAL: [float(i)], - } - for i in range(10) - ], - freq=metadata.freq, - ) - - test_ds = ListDataset( - data_iter=[ - { - FieldName.ITEM_ID: str(i), - FieldName.START: start_date, - FieldName.TARGET: [float(i)] * 30, - FieldName.FEAT_STATIC_CAT: [i], - FieldName.FEAT_STATIC_REAL: [float(i)], - } - for i in range(10) - ], - freq=metadata.freq, - ) - - info = DatasetInfo( - name="constant_dataset", - metadata=metadata, - prediction_length=6, - train_statistics=calculate_dataset_statistics(train_ds), - test_statistics=calculate_dataset_statistics(test_ds), - ) - - return info, train_ds, test_ds - - -def generate_sf2( - filename: str, time_series: List, is_missing: bool, num_missing: int -) -> None: - # This function generates the test and train json files which will be converted to csv format - if not os.path.exists(os.path.dirname(filename)): - os.makedirs(os.path.dirname(filename)) - with open(filename, "w") as json_file: - for ts in time_series: - if is_missing: - target = [] # type: List - # For Forecast don't output feat_static_cat and feat_static_real - for j, val in enumerate(ts[FieldName.TARGET]): - # only add ones that are not missing - if j != 0 and j % num_missing == 0: - target.append(None) - else: - target.append(val) - ts[FieldName.TARGET] = target - ts.pop(FieldName.FEAT_STATIC_CAT, None) - ts.pop(FieldName.FEAT_STATIC_REAL, None) - # Chop features in training set - if FieldName.FEAT_DYNAMIC_REAL in ts.keys() and "train" in filename: - # TODO: Fix for missing values - for i, feat_dynamic_real in enumerate(ts[FieldName.FEAT_DYNAMIC_REAL]): - ts[FieldName.FEAT_DYNAMIC_REAL][i] = feat_dynamic_real[ - : len(ts[FieldName.TARGET]) - ] - json.dump(ts, json_file) - json_file.write("\n") diff --git a/pts/dataset/common.py b/pts/dataset/common.py deleted file mode 100644 index 25b876e..0000000 --- a/pts/dataset/common.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from typing import Any, Dict, Iterable, NamedTuple, List, Optional - -import pandas as pd -from pydantic import BaseModel - -# Dictionary used for data flowing through the transformations. -DataEntry = Dict[str, Any] - -# A Dataset is an iterable of DataEntry. -Dataset = Iterable[DataEntry] - - -class SourceContext(NamedTuple): - source: str - row: int - - -class FieldName: - """ - A bundle of default field names to be used by clients when instantiating - transformer instances. - """ - - ITEM_ID = "item_id" - - START = "start" - TARGET = "target" - - FEAT_STATIC_CAT = "feat_static_cat" - FEAT_STATIC_REAL = "feat_static_real" - FEAT_DYNAMIC_CAT = "feat_dynamic_cat" - FEAT_DYNAMIC_REAL = "feat_dynamic_real" - - FEAT_TIME = "time_feat" - FEAT_CONST = "feat_dynamic_const" - FEAT_AGE = "feat_dynamic_age" - - OBSERVED_VALUES = "observed_values" - IS_PAD = "is_pad" - FORECAST_START = "forecast_start" - - -class CategoricalFeatureInfo(BaseModel): - name: str - cardinality: str - - -class BasicFeatureInfo(BaseModel): - name: str - - -class MetaData(BaseModel): - freq: str = None - target: Optional[BasicFeatureInfo] = None - - feat_static_cat: List[CategoricalFeatureInfo] = [] - feat_static_real: List[BasicFeatureInfo] = [] - feat_dynamic_real: List[BasicFeatureInfo] = [] - feat_dynamic_cat: List[CategoricalFeatureInfo] = [] - - prediction_length: Optional[int] = None - - -class TrainDatasets(NamedTuple): - """ - A dataset containing two subsets, one to be used for training purposes, - and the other for testing purposes, as well as metadata. - """ - - metadata: MetaData - train: Dataset - test: Optional[Dataset] = None - - -class DateConstants: - """ - Default constants for specific dates. - """ - - OLDEST_SUPPORTED_TIMESTAMP = pd.Timestamp(1800, 1, 1, 12) - LATEST_SUPPORTED_TIMESTAMP = pd.Timestamp(2200, 1, 1, 12) diff --git a/pts/dataset/file_dataset.py b/pts/dataset/file_dataset.py deleted file mode 100644 index e48d8bf..0000000 --- a/pts/dataset/file_dataset.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import functools -import glob -import random -from pathlib import Path -from typing import Iterator, List -from typing import NamedTuple - -import rapidjson as json - -from .common import Dataset, DataEntry, SourceContext -from .process import ProcessDataEntry - - -def load(file_obj): - for line in file_obj: - yield json.loads(line) - - -class Span(NamedTuple): - path: Path - line: int - - -class Line(NamedTuple): - content: object - span: Span - - -class JsonLinesFile: - """ - An iterable type that draws from a JSON Lines file. - - Parameters - ---------- - path - Path of the file to load data from. This should be a valid - JSON Lines file. - """ - - def __init__(self, path: Path, shuffle: bool = True) -> None: - self.path = path - self.shuffle = shuffle - - def __iter__(self): - with open(self.path) as jsonl_file: - lines = jsonl_file.read().splitlines() - if self.shuffle: - random.shuffle(lines) - - for line_number, raw in enumerate(lines, start=1): - span = Span(path=self.path, line=line_number) - try: - yield Line(json.loads(raw), span=span) - except ValueError: - raise Exception(f"Could not read json line {line_number}, {raw}") - - def __len__(self): - # 1MB - BUF_SIZE = 1024 ** 2 - - with open(self.path) as file_obj: - read_chunk = functools.partial(file_obj.read, BUF_SIZE) - return sum(chunk.count("\n") for chunk in iter(read_chunk, "")) - - -class FileDataset(Dataset): - """ - Dataset that loads JSON Lines files contained in a path. - - Parameters - ---------- - path - Return list of path names that match path. Each file is considered - and should be valid. A valid line in a file can be for - instance: {"start": "2014-09-07", "target": [0.1, 0.2]}. - freq - Frequency of the observation in the time series. - Must be a valid Pandas frequency. - one_dim_target - Whether to accept only univariate target time series. - shuffle - Whether to shuffle the time series when making the batches - """ - - def __init__( - self, path: Path, freq: str, one_dim_target: bool = True, shuffle: bool = False - ) -> None: - self.shuffle = shuffle - self.path = path - self.process = ProcessDataEntry(freq, one_dim_target=one_dim_target) - if not self.files(): - raise OSError(f"no valid file found via {path}") - - def __iter__(self) -> Iterator[DataEntry]: - for path in self.files(): - for line in JsonLinesFile(path, self.shuffle): - data = self.process(line.content) - data["source"] = SourceContext( - source=line.span.path, row=line.span.line - ) - yield data - - def __len__(self): - return sum([len(JsonLinesFile(path)) for path in self.files()]) - - def files(self) -> List[Path]: - """ - List the files that compose the dataset. - - Returns - ------- - List[Path] - List of the paths of all files composing the dataset. - """ - files = glob.glob(str(self.path)) - if self.shuffle: - random.shuffle(files) - return files - diff --git a/pts/dataset/list_dataset.py b/pts/dataset/list_dataset.py deleted file mode 100644 index eae2b00..0000000 --- a/pts/dataset/list_dataset.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import random -from typing import Iterable - -from .common import DataEntry, Dataset, SourceContext -from .process import ProcessDataEntry - - -class ListDataset(Dataset): - def __init__( - self, - data_iter: Iterable[DataEntry], - freq: str, - one_dim_target: bool = True, - shuffle: bool = False, - ) -> None: - process = ProcessDataEntry(freq, one_dim_target) - self.list_data = [process(data) for data in data_iter] - if shuffle: - random.shuffle(self.list_data) - - def __iter__(self): - source_name = "list_data" - for row_number, data in enumerate(self.list_data, start=1): - data["source"] = SourceContext(source=source_name, row=row_number) - yield data - - def __len__(self): - return len(self.list_data) diff --git a/pts/dataset/loader.py b/pts/dataset/loader.py deleted file mode 100644 index bcc520f..0000000 --- a/pts/dataset/loader.py +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import itertools -from collections import defaultdict -from typing import Any, Dict, Iterable, Iterator, List, Optional # noqa: F401 - -import numpy as np -# Third-party imports -import torch - -from pts.transform.transform import Transformation -# First-party imports -from .common import DataEntry, Dataset - -DataBatch = Dict[str, Any] - - -class BatchBuffer: - def __init__( - self, batch_size: int, device: torch.device, dtype: np.dtype = np.float32 - ) -> None: - self._buffers: Dict[Any, List[Any]] = defaultdict(list) - self.batch_size = batch_size - self._size = 0 - self.device = device - self.dtype = dtype - - def add(self, d: Dict[str, List[np.ndarray]]): - if self._buffers: - assert self._buffers.keys() == d.keys() - for k, v in d.items(): - self._buffers[k].append(v) - self._size += 1 - - def __len__(self): - return self._size - - def next_batch(self) -> DataBatch: - assert self._size > 0 - n = min(self._size, self.batch_size) - batch = {k: self.stack(v[:n]) for k, v in self._buffers.items()} - for key in self._buffers.keys(): - self._buffers[key] = self._buffers[key][n:] - self._size -= n - return batch - - def stack(self, xs): - if isinstance(xs[0], np.ndarray): - data = np.asarray(xs) - if data.dtype.kind == "f": - data = data.astype(self.dtype) - return torch.from_numpy(data).to(device=self.device, non_blocking=True) - elif isinstance(xs[0], torch.Tensor): - return torch.stack(*xs) - else: - return xs # stack all other types as list - - def shuffle(self): - perm = np.random.permutation(self._size) - for key in self._buffers.keys(): - li = self._buffers[key] - self._buffers[key] = [li[i] for i in perm] - - -class DataLoader(Iterable[DataEntry]): - """ - An abstract Iterable type for iterating and transforming a dataset, - in batches of a prescribed size. - - Parameters - ---------- - dataset - The dataset from which to load data. - transform - A transformation to apply to each entry in the dataset. - batch_size - The size of the batches to emit. - device - device to use to store data on. - dtype - Floating point type to use. - """ - - def __init__( - self, - dataset: Dataset, - transform: Transformation, - batch_size: int, - device: torch.device, - dtype: np.dtype = np.float32, - ) -> None: - self.dataset = dataset - self.transform = transform - self.batch_size = batch_size - self.device = device - self.dtype = dtype - - -class TrainDataLoader(DataLoader): - """ - An Iterable type for iterating and transforming a dataset, in batches of a - prescribed size, until a given number of batches is reached. - - The transformation are applied with in training mode, i.e. with the flag - `is_train = True`. - - Parameters - ---------- - dataset - The dataset from which to load data. - transform - A transformation to apply to each entry in the dataset. - batch_size - The size of the batches to emit. - device - device to use to store data on. - num_batches_per_epoch - Number of batches to return in one complete iteration over this object. - dtype - Floating point type to use. - """ - - def __init__( - self, - dataset: Dataset, - transform: Transformation, - batch_size: int, - device: torch.device, - num_batches_per_epoch: int, - dtype: np.dtype = np.float32, - shuffle_for_training: bool = True, - num_batches_for_shuffling: int = 10, - ) -> None: - super().__init__(dataset, transform, batch_size, device, dtype) - self.num_batches_per_epoch = num_batches_per_epoch - self.shuffle_for_training = shuffle_for_training - self._num_buffered_batches = ( - num_batches_for_shuffling if shuffle_for_training else 1 - ) - self._cur_iter: Optional[Iterator] = None - self._buffer = BatchBuffer(self.batch_size, device, dtype) - - def _emit_batches_while_buffer_larger_than(self, thresh) -> Iterator[DataBatch]: - if self.shuffle_for_training: - self._buffer.shuffle() - while len(self._buffer) > thresh: - yield self._buffer.next_batch() - - def _iterate_forever(self, collection: Iterable[DataEntry]) -> Iterator[DataEntry]: - # iterate forever over the collection, the collection must be non empty - while True: - try: - first = next(iter(collection)) - except StopIteration: - raise Exception("empty dataset") - else: - for x in itertools.chain([first], collection): - yield x - - def __len__(self) -> int: - return self.num_batches_per_epoch - - def __iter__(self) -> Iterator[DataBatch]: - batch_count = 0 - if self._cur_iter is None: - self._cur_iter = self.transform( - self._iterate_forever(self.dataset), is_train=True - ) - assert self._cur_iter is not None - while True: - data_entry = next(self._cur_iter) - self._buffer.add(data_entry) - if len(self._buffer) >= self._num_buffered_batches * self.batch_size: - for batch in self._emit_batches_while_buffer_larger_than( - self.batch_size - 1 - ): - yield batch - batch_count += 1 - if batch_count >= self.num_batches_per_epoch: - return - - -class InferenceDataLoader(DataLoader): - """ - An Iterable type for iterating and transforming a dataset just once, in - batches of a prescribed size. - - The transformation are applied with in inference mode, i.e. with the flag - `is_train = False`. - - Parameters - ---------- - dataset - The dataset from which to load data. - transform - A transformation to apply to each entry in the dataset. - batch_size - The size of the batches to emit. - device - device to use to store data on. - dtype - Floating point type to use. - """ - - def __iter__(self) -> Iterator[DataBatch]: - buffer = BatchBuffer(self.batch_size, self.device, self.dtype) - for data_entry in self.transform(iter(self.dataset), is_train=False): - buffer.add(data_entry) - if len(buffer) >= self.batch_size: - yield buffer.next_batch() - if len(buffer) > 0: - yield buffer.next_batch() diff --git a/pts/dataset/multivariate_grouper.py b/pts/dataset/multivariate_grouper.py deleted file mode 100644 index 2f12b48..0000000 --- a/pts/dataset/multivariate_grouper.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Standard library imports -import logging -from typing import Callable, Optional - -import numpy as np -import pandas as pd - -# First-party imports -from .common import DataEntry, Dataset, FieldName, DateConstants -from .list_dataset import ListDataset - - -class MultivariateGrouper: - """ - The MultivariateGrouper takes a univariate dataset and groups it into a - single multivariate time series. Therefore, this class allows the user - to convert a univariate dataset into a multivariate dataset without making - a separate copy of the dataset. - - The Multivariate Grouper has two different modes: - - Training: For training data, the univariate time series get aligned to the - earliest time stamp in the dataset. Time series will be left and right - padded to produce an array of shape (dim, num_time_steps) - - Test: The test dataset might have multiple start dates (usually because - the test dataset mimics a rolling evaluation scenario). In this case, - the univariate dataset will be split into n multivariate time series, - where n is the number of evaluation dates. Again, the - time series will be grouped but only left padded. Note that the - padded value will influence the prediction if the context length is - longer than the length of the time series. - - Rules for padding for training and test datasets can be specified by the - user. - - Parameters - ---------- - max_target_dim - Set maximum dimensionality (for faster testing or when hitting - constraints of multivariate model). Takes the last max_target_dim - time series and groups them to multivariate time series. - num_test_dates - Number of test dates in the test set. This can be more than one if - the test set contains more than one forecast start date (often the - case in a rolling evaluation scenario). Must be set to convert test - data. - train_fill_rule - Implements the rule that fills missing data after alignment of the - time series for the training dataset. - test_fill_rule - Implements the rule that fills missing data after alignment of the - time series for the test dataset. - - """ - - def __init__( - self, - max_target_dim: Optional[int] = None, - num_test_dates: Optional[int] = None, - train_fill_rule: Callable = np.mean, - test_fill_rule: Callable = lambda x: 0.0, - ) -> None: - self.num_test_dates = num_test_dates - self.max_target_dimension = max_target_dim - self.train_fill_function = train_fill_rule - self.test_fill_rule = test_fill_rule - - self.first_timestamp = DateConstants.LATEST_SUPPORTED_TIMESTAMP - self.last_timestamp = DateConstants.OLDEST_SUPPORTED_TIMESTAMP - self.frequency = "" - - def __call__(self, dataset: Dataset) -> Dataset: - self._preprocess(dataset) - return self._group_all(dataset) - - def _preprocess(self, dataset: Dataset) -> None: - """ - The preprocess function iterates over the dataset to gather data that - is necessary for alignment. - This includes - 1) Storing first/last timestamp in the dataset - 2) Storing the frequency of the dataset - """ - for data in dataset: - timestamp = data[FieldName.START] - self.first_timestamp = min(self.first_timestamp, timestamp) - self.last_timestamp = max( - self.last_timestamp, - timestamp + (len(data[FieldName.TARGET]) - 1) * timestamp.freq, - ) - self.frequency = timestamp.freq - logging.info( - f"first/last timestamp found: " - f"{self.first_timestamp}/{self.last_timestamp}" - ) - - def _group_all(self, dataset: Dataset) -> Dataset: - if self.num_test_dates is None: - grouped_dataset = self._prepare_train_data(dataset) - else: - grouped_dataset = self._prepare_test_data(dataset) - return grouped_dataset - - def _prepare_train_data(self, dataset: Dataset) -> ListDataset: - logging.info("group training time-series to datasets") - - grouped_data = self._transform_target(self._align_data_entry, dataset) - grouped_data = self._restrict_max_dimensionality(grouped_data) - grouped_data[FieldName.START] = self.first_timestamp - grouped_data[FieldName.FEAT_STATIC_CAT] = [0] - - return ListDataset([grouped_data], freq=self.frequency, one_dim_target=False) - - def _prepare_test_data(self, dataset: Dataset) -> ListDataset: - logging.info("group test time-series to datasets") - - grouped_data = self._transform_target(self._left_pad_data, dataset) - # splits test dataset with rolling date into N R^d time series where - # N is the number of rolling evaluation dates - split_dataset = np.split(grouped_data[FieldName.TARGET], self.num_test_dates) - - all_entries = list() - for dataset_at_test_date in split_dataset: - grouped_data = dict() - grouped_data[FieldName.TARGET] = np.array( - list(dataset_at_test_date), dtype=np.float32 - ) - grouped_data = self._restrict_max_dimensionality(grouped_data) - grouped_data[FieldName.START] = self.first_timestamp - grouped_data[FieldName.FEAT_STATIC_CAT] = [0] - all_entries.append(grouped_data) - - return ListDataset( - all_entries, freq=self.frequency, one_dim_target=False - ) - - def _align_data_entry(self, data: DataEntry) -> np.array: - ts = self.to_ts(data) - return ts.reindex( - pd.date_range( - start=self.first_timestamp, - end=self.last_timestamp, - freq=data[FieldName.START].freq, - ), - fill_value=self.train_fill_function(ts), - ).values - - def _left_pad_data(self, data: DataEntry) -> np.array: - ts = self.to_ts(data) - return ts.reindex( - pd.date_range( - start=self.first_timestamp, - end=ts.index[-1], - freq=data[FieldName.START].freq, - ), - fill_value=self.test_fill_rule(ts), - ).values - - @staticmethod - def _transform_target(funcs, dataset: Dataset) -> DataEntry: - return {FieldName.TARGET: np.array([funcs(data) for data in dataset])} - - def _restrict_max_dimensionality(self, data: DataEntry) -> DataEntry: - """ - Takes the last max_target_dimension dimensions from a multivariate - dataentry. - - Parameters - ---------- - data - multivariate data entry with (dim, num_timesteps) target field - - Returns - ------- - DataEntry - data multivariate data entry with - (max_target_dimension, num_timesteps) target field - """ - - if self.max_target_dimension is not None: - # restrict maximum dimensionality (for faster testing) - data[FieldName.TARGET] = data[FieldName.TARGET][ - -self.max_target_dimension :, : - ] - return data - - @staticmethod - def to_ts(data: DataEntry) -> pd.Series: - return pd.Series( - data[FieldName.TARGET], - index=pd.date_range( - start=data[FieldName.START], - periods=len(data[FieldName.TARGET]), - freq=data[FieldName.START].freq, - ), - ) - diff --git a/pts/dataset/process.py b/pts/dataset/process.py deleted file mode 100644 index 08c97d6..0000000 --- a/pts/dataset/process.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from functools import lru_cache -from typing import Callable, List, cast - -import numpy as np -import pandas as pd -from pandas.tseries.offsets import Tick - -from .common import DataEntry - - -class ProcessStartField: - def __init__(self, name: str, freq: str) -> None: - self.name = name - self.freq = freq - - def __call__(self, data: DataEntry) -> DataEntry: - try: - value = ProcessStartField.process(data[self.name], self.freq) - except (TypeError, ValueError) as e: - raise Exception(f'Error "{e}" occurred when reading field "{self.name}"') - - data[self.name] = value - - return data - - @staticmethod - @lru_cache(maxsize=10000) - def process(string: str, freq: str) -> pd.Timestamp: - timestamp = pd.Timestamp(string, freq=freq) - - # operate on time information (days, hours, minute, second) - if isinstance(timestamp.freq, Tick): - return pd.Timestamp(timestamp.floor(timestamp.freq), timestamp.freq) - - # since we are only interested in the data piece, we normalize the - # time information - timestamp = timestamp.replace( - hour=0, minute=0, second=0, microsecond=0, nanosecond=0 - ) - - return timestamp.freq.rollforward(timestamp) - - -class ProcessTimeSeriesField: - def __init__(self, name, is_required: bool, is_static: bool, is_cat: bool) -> None: - self.name = name - self.is_required = is_required - self.req_ndim = 1 if is_static else 2 - self.dtype = np.int64 if is_cat else np.float32 - - def __call__(self, data: DataEntry) -> DataEntry: - value = data.get(self.name, None) - - if value is not None: - value = np.asarray(value, dtype=self.dtype) - dim_diff = self.req_ndim - value.ndim - if dim_diff == 1: - value = np.expand_dims(a=value, axis=0) - elif dim_diff != 0: - raise Exception( - f"JSON array has bad shape - expected {self.req_ndim} dimensions got {dim_diff}" - ) - - data[self.name] = value - return data - elif not self.is_required: - return data - else: - raise Exception(f"JSON object is missing a required field `{self.name}`") - - -class ProcessDataEntry: - def __init__(self, freq: str, one_dim_target: bool = True) -> None: - self.trans = cast( - List[Callable[[DataEntry], DataEntry]], - [ - ProcessStartField("start", freq=freq), - ProcessTimeSeriesField( - "target", is_required=True, is_cat=False, is_static=one_dim_target - ), - ProcessTimeSeriesField( - "feat_dynamic_cat", is_required=False, is_cat=True, is_static=False - ), - ProcessTimeSeriesField( - "feat_dynamic_real", - is_required=False, - is_cat=False, - is_static=False, - ), - ProcessTimeSeriesField( - "feat_static_cat", is_required=False, is_cat=True, is_static=True - ), - ProcessTimeSeriesField( - "feat_static_real", is_required=False, is_cat=False, is_static=True - ), - ], - ) - - def __call__(self, data: DataEntry) -> DataEntry: - for t in self.trans: - data = t(data) - return data diff --git a/pts/dataset/recipe.py b/pts/dataset/recipe.py deleted file mode 100644 index 943c3de..0000000 --- a/pts/dataset/recipe.py +++ /dev/null @@ -1,604 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Standard library imports -import functools -import itertools -import operator -from typing import ( - Any, - Callable, - Dict, - Iterator, - List, - Optional, - Sequence, - Tuple, - Union, -) - -# Third-party imports -import numpy as np -import pandas as pd - -# First-party imports -from .common import DataEntry - -ValueOrCallable = Union[Any, Callable] -Recipe = List[Tuple[str, Callable]] -Env = Dict[str, Any] - - -def resolve(val_or_callable: ValueOrCallable, context: Env, *args, **kwargs): - if callable(val_or_callable): - return val_or_callable(context, *args, **kwargs) - elif isinstance(val_or_callable, str): - return context[val_or_callable] - else: - return val_or_callable - - -def generate( - length: int, - recipe: Union[Callable, Recipe], - start: pd.Timestamp, - global_state: Optional[dict] = None, - seed: int = 0, - item_id_prefix: str = "", -) -> Iterator[DataEntry]: - np.random.seed(seed) - - if global_state is None: - global_state = {} - - if isinstance(recipe, list): - for x in itertools.count(): - data: DataEntry = {} - for k, f in recipe: - data[k] = resolve( - f, data, length=length, field_name=k, global_state=global_state, - ) - yield dict(**data, item_id=item_id_prefix + str(x), start=start) - else: - assert callable(recipe) - for x in itertools.count(): - data = recipe(length=length, global_state=global_state) - yield dict(**data, item_id=item_id_prefix + str(x), start=start) - - -def evaluate( - funcs: Recipe, length: int, *args, global_state: dict = None, **kwargs -) -> Env: - if global_state is None: - global_state = {} - - if "length" in kwargs: - del kwargs["length"] - if "field_name" in kwargs: - del kwargs["field_name"] - if "global_state" in kwargs: - del kwargs["global_state"] - - data: DataEntry = {} - for k, f in funcs: - try: - data[k] = resolve( - f, - data, - length=length, - field_name=k, - global_state=global_state, - *args, - **kwargs - ) - except ValueError as e: - raise ValueError('Error while evaluating key "{}"'.format(k), e) - - return data - - -def make_func( - length: int, funcs: Recipe, global_state=None -) -> Callable[[int, Env], DataEntry]: - if global_state is None: - global_state = {} - - def f(length=length, global_state=global_state, *args, **kwargs): - data = {} - for k, f in funcs: - data[k] = resolve( - f, - data, - length=length, - field_name=k, - global_state=global_state, - *args, - **kwargs - ) - return data - - return f - - -def take_as_list(iterator, num): - return list(itertools.islice(iterator, num)) - - -class Debug: - def __init__(self, print_global=False) -> None: - self.print_global = print_global - - def __call__(self, x: Env, global_state, **kwargs): - print(x) - if self.print_global: - print(global_state) - return 0 - - -class Lifted: - def __add__(self, other): - return LiftedAdd(self, other) - - def __radd__(self, other): - return LiftedAdd(other, self) - - def __sub__(self, other): - return LiftedSub(self, other) - - def __rsub__(self, other): - return LiftedSub(other, self) - - def __mul__(self, other): - return LiftedMul(self, other, operator.mul) - - def __rmul__(self, other): - return LiftedMul(other, self, operator.mul) - - def __truediv__(self, other): - return LiftedTruediv(self, other, operator.truediv) - - def __rtruediv__(self, other): - return LiftedTruediv(other, self, operator.truediv) - - def __call__( - self, x: Env, length: int, field_name: str, global_state: Dict, *args, **kwargs - ): - pass - - -class LiftedBinaryOp(Lifted): - def __init__(self, left, right, op) -> None: - self.left = left - self.right = right - self.op = op - - def __call__(self, *args, **kwargs): - left = resolve(self.left, *args, **kwargs) - right = resolve(self.right, *args, **kwargs) - return self.op(left, right) - - -class LiftedAdd(LiftedBinaryOp): - def __init__(self, left, right) -> None: - super().__init__(left, right, operator.add) - - -class LiftedSub(LiftedBinaryOp): - def __init__(self, left, right) -> None: - super().__init__(left, right, operator.sub) - - -class LiftedMul(LiftedBinaryOp): - def __init__(self, left, right) -> None: - super().__init__(left, right, operator.mul) - - -class LiftedTruediv(LiftedBinaryOp): - def __init__(self, left, right) -> None: - super().__init__(left, right, operator.truediv) - - -class RandomGaussian(Lifted): - def __init__( - self, stddev: ValueOrCallable = 1.0, shape: Sequence[int] = (0,) - ) -> None: - self.stddev = stddev - self.shape = shape - - def __call__(self, x: Env, length: int, *args, **kwargs): - stddev = resolve(self.stddev, x, length, *args, **kwargs) - s = np.array(self.shape) - s[s == 0] = length - return stddev * np.random.randn(*s) - - -# Binary recipe that returns 1 if date is in holidays list and 0 otherwise -class BinaryHolidays(Lifted): - # TODO: holidays is type List[datetime.date] - def __init__(self, dates: List[pd.Timestamp], holidays: List[Any]) -> None: - self.dates = dates - self.holidays = holidays - - def __call__(self, *args, **kwargs): - length = len(self.dates) - out = np.ones(length) - for i, date in enumerate(self.dates): - # Convert to string to check if inside of holidays datatime.date - if date.date() in self.holidays: - out[i] = 1.0 - else: - out[i] = 0.0 - return out - - -class RandomBinary(Lifted): - def __init__(self, prob: ValueOrCallable = 0.1) -> None: - self.prob = prob - - def __call__(self, x: Env, length: int, *args, **kwargs): - prob = resolve(self.prob, x, length, *args, **kwargs) - return 1.0 * (np.random.rand(length) < prob) - - -class RandomSymmetricDirichlet(Lifted): - def __init__( - self, alpha: ValueOrCallable = 1.0, shape: Sequence[int] = (0,) - ) -> None: - self.alpha = alpha - self.shape = shape - - def __call__(self, x, length, *args, **kwargs): - alpha = resolve(self.alpha, x, length, *args, **kwargs) - s = np.array(self.shape) - s[s == 0] = length - return np.random.dirichlet(alpha * np.ones(s)) - - -class BinaryMarkovChain(Lifted): - def __init__( - self, one_to_zero: ValueOrCallable, zero_to_one: ValueOrCallable - ) -> None: - self.one_to_zero = one_to_zero - self.zero_to_one = zero_to_one - - def __call__(self, x: Env, length: int, *args, **kwargs): - probs = np.zeros(2) - probs[0] = resolve(self.zero_to_one, x, length, *args, **kwargs) - probs[1] = resolve(self.one_to_zero, x, length, *args, **kwargs) - out = np.ones(length, dtype=np.int) # initial state is 1 - uu = np.random.rand(length) - for i in range(1, length): - if uu[i] < probs[out[i - 1]]: - out[i] = 1 - out[i - 1] - else: - out[i] = out[i - 1] - return out - - -class Constant(Lifted): - def __init__(self, constant) -> None: - self.constant = constant - - def __call__(self, *args, **kwargs): - return self.constant - - -class ConstantVec(Lifted): - def __init__(self, constant: ValueOrCallable) -> None: - self.constant = constant - - def __call__(self, x: Env, length: int, *args, **kwargs): - constant = resolve(self.constant, x, length, *args, **kwargs) - return constant * np.ones(length) - - -class NormalizeMax(Lifted): - def __init__(self, input) -> None: - self.input = input - - def __call__(self, x: Env, *args, **kwargs): - inp = resolve(self.input, x, *args, kwargs) - return inp / np.max(inp) - - -class OnesLike(Lifted): - def __init__(self, other) -> None: - self.other = other - - def __call__(self, x, length, *args, **kwargs): - other = resolve(self.other, x, length, **kwargs) - return np.ones_like(other) - - -class LinearTrend(Lifted): - def __init__(self, slope: ValueOrCallable = 1.0) -> None: - self.slope = slope - - def __call__(self, x, length, *args, **kwargs): - slope = resolve(self.slope, x, length, *args, **kwargs) - return slope * np.arange(length) / length - - -class RandomCat: - def __init__( - self, - cardinalities: List[int], - prob_fun: Callable = RandomSymmetricDirichlet(alpha=1.0, shape=(0,)), - ) -> None: - self.cardinalities = cardinalities - self.prob_fun = prob_fun - - def __call__(self, x, field_name, global_state, **kwargs): - if field_name not in global_state: - probs = [self.prob_fun(x, length=c) for c in self.cardinalities] - global_state[field_name] = probs - probs = global_state[field_name] - cats = np.array( - [ - np.random.choice(np.arange(len(probs[i])), p=probs[i]) - for i in range(len(probs)) - ] - ) - return cats - - -class Lag(Lifted): - def __init__( - self, input: ValueOrCallable, lag: ValueOrCallable = 0, pad_const: int = 0, - ) -> None: - self.input = input - self.lag = lag - self.pad_const = pad_const - - def __call__(self, x, *args, **kwargs): - feat = resolve(self.input, x, *args, **kwargs) - lag = resolve(self.lag, x, *args, **kwargs) - - if lag > 0: - lagged_feat = np.concatenate((self.pad_const * np.ones(lag), feat[:-lag])) - elif lag < 0: - lagged_feat = np.concatenate((feat[-lag:], self.pad_const * np.ones(-lag))) - - else: - lagged_feat = feat - return lagged_feat - - -class ForEachCat(Lifted): - def __init__(self, fun, cat_field="cat", cat_idx=0) -> None: - self.fun = fun - self.cat_field = cat_field - self.cat_idx = cat_idx - - def __call__( - self, x: Env, length: int, field_name: str, global_state: Dict, *args, **kwargs - ): - c = x[self.cat_field][self.cat_idx] - if field_name not in global_state: - global_state[field_name] = np.empty( - len(global_state[self.cat_field][self.cat_idx]), dtype=np.object, - ) - if global_state[field_name][c] is None: - global_state[field_name][c] = self.fun( - x, length=length, field_name=field_name, *args, **kwargs - ) - return global_state[field_name][c] - - -class Eval(Lifted): - def __init__(self, expr: str) -> None: - self.expr = expr - - def __call__(self, x: Env, length: int, *args, **kwargs): - return eval(self.expr, globals(), dict(x=x, length=length, **kwargs)) - - -class SmoothSeasonality(Lifted): - def __init__(self, period: ValueOrCallable, phase: ValueOrCallable) -> None: - self.period = period - self.phase = phase - - def __call__(self, x: Env, length: int, *args, **kwargs): - period = resolve(self.period, x, length, *args, **kwargs) - phase = resolve(self.phase, x, length, *args, **kwargs) - return (np.sin(2.0 / period * np.pi * (np.arange(length) + phase)) + 1) / 2.0 - - -class Add(Lifted): - def __init__(self, inputs: List[ValueOrCallable]) -> None: - self.inputs = inputs - - def __call__(self, x: Env, length: int, *args, **kwargs): - return sum([resolve(k, x, length, *args, **kwargs) for k in self.inputs]) - - -class Mul(Lifted): - def __init__(self, inputs) -> None: - self.inputs = inputs - - def __call__(self, x: Env, length: int, *args, **kwargs): - return functools.reduce( - operator.mul, [resolve(k, x, length, *args, **kwargs) for k in self.inputs], - ) - - -class NanWhere(Lifted): - def __init__(self, source: ValueOrCallable, nan_indicator: ValueOrCallable) -> None: - self.source = source - self.nan_indicator = nan_indicator - - def __call__(self, x: Env, length: int, *args, **kwargs): - source = resolve(self.source, x, length, *args, **kwargs) - nan_indicator = resolve(self.nan_indicator, x, length, *args, **kwargs) - out = source.copy() - out[nan_indicator == 1] = np.nan - return out - - -class OneMinus(Lifted): - def __init__(self, source: ValueOrCallable) -> None: - self.source = source - - def __call__(self, x: Env, length: int, *args, **kwargs): - value = resolve(self.source, x, length, *args, **kwargs) - return 1 - value - - -class Concatenate(Lifted): - def __init__(self, inputs: List[ValueOrCallable], axis: int = 0) -> None: - self.inputs = inputs - self.axis = axis - - def __call__(self, x: Env, length: int, *args, **kwargs): - inputs = [resolve(z, x, length, **kwargs) for z in self.inputs] - return np.concatenate(inputs, self.axis) - - -class Stack(Lifted): - def __init__(self, inputs: List[ValueOrCallable]) -> None: - self.inputs = inputs - - def __call__(self, x: Env, length: int, *args, **kwargs): - inputs = [resolve(z, x, length, **kwargs) for z in self.inputs] - return np.stack(inputs, axis=0) - - -class StackPrefix(Lifted): - def __init__(self, prefix: str) -> None: - self.prefix = prefix - - def __call__(self, x: Env, length: int, *args, **kwargs): - inputs = [v for k, v in x.items() if k.startswith(self.prefix)] - return np.stack(inputs, axis=0) - - -class Ref(Lifted): - def __init__(self, field_name: str) -> None: - self.field_name = field_name - - def __call__(self, x: Env, length: int, *args, **kwargs): - return x[self.field_name] - - -class RandomUniform(Lifted): - def __init__( - self, low: ValueOrCallable = 0.0, high: ValueOrCallable = 1.0, shape=(0,), - ) -> None: - self.low = low - self.high = high - self.shape = shape - - def __call__(self, x: Env, length: int, *args, **kwargs): - low = resolve(self.low, x, length, *args, **kwargs) - high = resolve(self.high, x, length, *args, **kwargs) - s = np.array(self.shape) - s[s == 0] = length - return np.random.uniform(low, high, s) - - -class RandomInteger(Lifted): - def __init__( - self, - low: ValueOrCallable, - high: ValueOrCallable, - shape: Optional[Sequence[int]] = (0,), - ) -> None: - self.low = low - self.high = high - self.shape = shape - - def __call__(self, x: Env, length: int, *args, **kwargs): - low = resolve(self.low, x, length, *args, **kwargs) - high = resolve(self.high, x, length, *args, **kwargs) - if self.shape is not None: - s = np.array(self.shape) - s[s == 0] = length - return np.random.randint(low, high, s) - else: - return np.random.randint(low, high) - - -class RandomChangepoints(Lifted): - def __init__(self, max_num_changepoints: ValueOrCallable) -> None: - self.max_num_changepoints = max_num_changepoints - - def __call__(self, x: Env, length: int, *args, **kwargs): - max_num_changepoints = resolve( - self.max_num_changepoints, x, length, *args, **kwargs - ) - num_changepoints = np.random.randint(0, max_num_changepoints + 1) - change_idx = np.sort( - np.random.randint(low=1, high=length - 1, size=(num_changepoints,)) - ) - change_ranges = np.concatenate([change_idx, [length]]) - out = np.zeros(length, dtype=np.int) - for i in range(0, num_changepoints): - out[change_ranges[i] : change_ranges[i + 1]] = i + 1 - return out - - -class Repeated(Lifted): - def __init__(self, pattern: ValueOrCallable) -> None: - self.pattern = pattern - - def __call__(self, x: Env, length: int, *args, **kwargs): - pattern = resolve(self.pattern, x, length, **kwargs) - repeats = length // len(pattern) + 1 - out = np.tile(pattern, (repeats,)) - return out[:length] - - -class Convolve(Lifted): - def __init__(self, input: ValueOrCallable, filter: ValueOrCallable) -> None: - self.filter = filter - self.input = input - - def __call__(self, x: Env, length: int, *args, **kwargs): - fil = resolve(self.filter, x, length, **kwargs) - inp = resolve(self.input, x, length, **kwargs) - out = np.convolve(inp, fil, mode="same") - return out - - -class Dilated(Lifted): - def __init__(self, source: Callable, dilation: int) -> None: - self.source = source - self.dilation = dilation - - def __call__(self, x: Env, length: int, *args, **kwargs): - inner = self.source(x, length // self.dilation + 1, **kwargs) - out = np.repeat(inner, self.dilation) - return out[:length] - - -class Choose(Lifted): - def __init__(self, options: ValueOrCallable, selector: ValueOrCallable) -> None: - self.options = options - self.selector = selector - - def __call__(self, x, length, **kwargs): - options = resolve(self.options, x, length, **kwargs) - selector = resolve(self.selector, x, length, **kwargs) - e = np.eye(options.shape[0]) - out = np.sum(e[selector] * options.T, axis=1) - return out - - -class EvalRecipe(Lifted): - def __init__(self, recipe: Recipe, op: ValueOrCallable) -> None: - self.recipe = recipe - self.op = op - - def __call__(self, x: Env, *args, **kwargs): - xx = evaluate(self.recipe, *args, **kwargs) - return resolve(self.op, xx, *args, **kwargs) diff --git a/pts/dataset/repository/__init__.py b/pts/dataset/repository/__init__.py index 0bd39eb..b74b6ee 100644 --- a/pts/dataset/repository/__init__.py +++ b/pts/dataset/repository/__init__.py @@ -1,14 +1 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from .datasets import get_dataset, dataset_recipes \ No newline at end of file +from .datasets import dataset_recipes \ No newline at end of file diff --git a/pts/dataset/repository/_artificial.py b/pts/dataset/repository/_artificial.py deleted file mode 100644 index 8b5fcda..0000000 --- a/pts/dataset/repository/_artificial.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Standard library imports -import json -from pathlib import Path - -# First-party imports -from pts.dataset import ArtificialDataset, generate_sf2, serialize_data_entry - - -def generate_artificial_dataset(dataset_path: Path, dataset: ArtificialDataset) -> None: - dataset_path_train = dataset_path / "train" - dataset_path_test = dataset_path / "test" - - dataset_path.mkdir(exist_ok=True) - dataset_path_train.mkdir(exist_ok=False) - dataset_path_test.mkdir(exist_ok=False) - - ds = dataset.generate() - assert ds.test is not None - - with (dataset_path / "metadata.json").open("w") as fp: - json.dump(ds.metadata.dict(), fp, indent=2, sort_keys=True) - - generate_sf2( - filename=str(dataset_path_train / "train.json"), - time_series=list(map(serialize_data_entry, ds.train)), - is_missing=False, - num_missing=0, - ) - - generate_sf2( - filename=str(dataset_path_test / "test.json"), - time_series=list(map(serialize_data_entry, ds.test)), - is_missing=False, - num_missing=0, - ) diff --git a/pts/dataset/repository/_gp_copula_2019.py b/pts/dataset/repository/_gp_copula_2019.py deleted file mode 100644 index 591f3b9..0000000 --- a/pts/dataset/repository/_gp_copula_2019.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -""" -Loads the datasets used in Salinas et al. 2019 (https://tinyurl.com/woyhhqy). -This wrapper downloads and unpacks them so they don'thave to be attached as -large files in GluonTS master. -""" -import json -import os -import shutil -import tarfile -from pathlib import Path -from typing import NamedTuple, Optional -from urllib import request - -from pts.dataset import FileDataset, FieldName -from ._util import save_to_file, to_dict, metadata - - -class GPCopulaDataset(NamedTuple): - name: str - url: str - num_series: int - prediction_length: int - freq: str - rolling_evaluations: int - max_target_dim: Optional[int] = None - - -root = ( - "https://raw.githubusercontent.com/mbohlkeschneider/gluon-ts/mv_release/datasets/" -) - -datasets_info = { - "exchange_rate_nips": GPCopulaDataset( - name="exchange_rate_nips", - url=root + "exchange_rate_nips.tar.gz", - num_series=8, - prediction_length=30, - freq="B", - rolling_evaluations=5, - max_target_dim=None, - ), - "electricity_nips": GPCopulaDataset( - name="electricity_nips", - url=root + "electricity_nips.tar.gz", - # original dataset can be found at https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014# - num_series=370, - prediction_length=24, - freq="H", - rolling_evaluations=7, - max_target_dim=None, - ), - "traffic_nips": GPCopulaDataset( - name="traffic_nips", - url=root + "traffic_nips.tar.gz", - # note there are 963 in the original dataset from https://archive.ics.uci.edu/ml/datasets/PEMS-SF - num_series=963, - prediction_length=24, - freq="H", - rolling_evaluations=7, - max_target_dim=None, - ), - "solar_nips": GPCopulaDataset( - name="solar-energy", - url=root + "solar_nips.tar.gz", - num_series=137, - prediction_length=24, - freq="H", - rolling_evaluations=7, - max_target_dim=None, - ), - "wiki-rolling_nips": GPCopulaDataset( - name="wiki-rolling_nips", - # That file lives on GitHub Large file storage (lfs). We need to use - # the exact link, otherwise it will only open the lfs pointer file. - url="https://github.com/mbohlkeschneider/gluon-ts/raw/650ad5ffe92d20e89d491966b6d8b4459e219be8/datasets/wiki-rolling_nips.tar.gz", - num_series=9535, - prediction_length=30, - freq="D", - rolling_evaluations=5, - max_target_dim=2000, - ), - "taxi_30min": GPCopulaDataset( - name="taxi_30min", - url=root + "taxi_30min.tar.gz", - num_series=1214, - prediction_length=24, - freq="30min", - rolling_evaluations=56, - max_target_dim=None, - ), -} - - -def generate_gp_copula_dataset(dataset_path: Path, dataset_name: str): - ds_info = datasets_info[dataset_name] - os.makedirs(dataset_path, exist_ok=True) - - download_dataset(dataset_path.parent, ds_info) - save_metadata(dataset_path, ds_info) - save_dataset(dataset_path / "train", ds_info) - save_dataset(dataset_path / "test", ds_info) - clean_up_dataset(dataset_path, ds_info) - - -def download_dataset(dataset_path: Path, ds_info: GPCopulaDataset): - request.urlretrieve(ds_info.url, dataset_path / f"{ds_info.name}.tar.gz") - - with tarfile.open(dataset_path / f"{ds_info.name}.tar.gz") as tar: - tar.extractall(path=dataset_path) - - -def save_metadata(dataset_path: Path, ds_info: GPCopulaDataset): - with open(dataset_path / "metadata.json", "w") as f: - f.write( - json.dumps( - metadata( - cardinality=ds_info.num_series, - freq=ds_info.freq, - prediction_length=ds_info.prediction_length, - ) - ) - ) - - -def save_dataset(dataset_path: Path, ds_info: GPCopulaDataset): - dataset = list(FileDataset(dataset_path / "*.json", freq=ds_info.freq)) - shutil.rmtree(dataset_path) - train_file = dataset_path / "data.json" - save_to_file( - train_file, - [ - to_dict( - target_values=data_entry[FieldName.TARGET], - start=data_entry[FieldName.START], - # Handles adding categorical features of rolling - # evaluation dates - cat=[cat - ds_info.num_series * (cat // ds_info.num_series)], - item_id=cat, - ) - for cat, data_entry in enumerate(dataset) - ], - ) - - -def clean_up_dataset(dataset_path: Path, ds_info: GPCopulaDataset): - os.remove(dataset_path.parent / f"{ds_info.name}.tar.gz") - shutil.rmtree(dataset_path / "metadata") diff --git a/pts/dataset/repository/_lstnet.py b/pts/dataset/repository/_lstnet.py deleted file mode 100644 index ade1708..0000000 --- a/pts/dataset/repository/_lstnet.py +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -""" -Here we reuse the datasets used by LSTNet as the processed url of the datasets -are available on GitHub. -""" -import json -import os -from pathlib import Path -from typing import List, NamedTuple, Optional - -import pandas as pd - -from pts.dataset import frequency_add -from ._util import save_to_file, to_dict, metadata - - -def load_from_pandas( - df: pd.DataFrame, time_index: pd.DatetimeIndex, agg_freq: Optional[str] = None, -) -> List[pd.Series]: - df = df.set_index(time_index) - - pivot_df = df.transpose() - pivot_df.head() - - timeseries = [] - for row in pivot_df.iterrows(): - ts = pd.Series(row[1].values, index=time_index) - if agg_freq is not None: - ts = ts.resample(agg_freq).sum() - first_valid = ts[ts.notnull()].index[0] - last_valid = ts[ts.notnull()].index[-1] - ts = ts[first_valid:last_valid] - - timeseries.append(ts) - - return timeseries - - -class LstnetDataset(NamedTuple): - name: str - url: str - num_series: int - num_time_steps: int - prediction_length: int - rolling_evaluations: int - freq: str - start_date: str - agg_freq: Optional[str] = None - - -root = ( - "https://raw.githubusercontent.com/laiguokun/multivariate-time-series-data/master/" -) - -datasets_info = { - "exchange_rate": LstnetDataset( - name="exchange_rate", - url=root + "exchange_rate/exchange_rate.txt.gz", - num_series=8, - num_time_steps=7588, - prediction_length=30, - rolling_evaluations=5, - start_date="1990-01-01", - freq="1B", - agg_freq=None, - ), - "electricity": LstnetDataset( - name="electricity", - url=root + "electricity/electricity.txt.gz", - # original dataset can be found at https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014# - # the aggregated ones that is used from LSTNet filters out from the initial 370 series the one with no data - # in 2011 - num_series=321, - num_time_steps=26304, - prediction_length=24, - rolling_evaluations=7, - start_date="2012-01-01", - freq="1H", - agg_freq=None, - ), - "traffic": LstnetDataset( - name="traffic", - url=root + "traffic/traffic.txt.gz", - # note there are 963 in the original dataset from https://archive.ics.uci.edu/ml/datasets/PEMS-SF - # but only 862 in LSTNet - num_series=862, - num_time_steps=17544, - prediction_length=24, - rolling_evaluations=7, - start_date="2015-01-01", - freq="H", - agg_freq=None, - ), - "solar-energy": LstnetDataset( - name="solar-energy", - url=root + "solar-energy/solar_AL.txt.gz", - num_series=137, - num_time_steps=52560, - prediction_length=24, - rolling_evaluations=7, - start_date="2006-01-01", - freq="10min", - agg_freq="1H", - ), -} - - -def generate_lstnet_dataset(dataset_path: Path, dataset_name: str): - ds_info = datasets_info[dataset_name] - - os.makedirs(dataset_path, exist_ok=True) - - with open(dataset_path / "metadata.json", "w") as f: - f.write( - json.dumps( - metadata( - cardinality=ds_info.num_series, - freq=ds_info.freq, - prediction_length=ds_info.prediction_length, - ) - ) - ) - - train_file = dataset_path / "train" / "data.json" - test_file = dataset_path / "test" / "data.json" - - time_index = pd.date_range( - start=ds_info.start_date, freq=ds_info.freq, periods=ds_info.num_time_steps, - ) - - df = pd.read_csv(ds_info.url, header=None) - - assert df.shape == ( - ds_info.num_time_steps, - ds_info.num_series, - ), f"expected num_time_steps/num_series {(ds_info.num_time_steps, ds_info.num_series)} but got {df.shape}" - - timeseries = load_from_pandas( - df=df, time_index=time_index, agg_freq=ds_info.agg_freq - ) - - # the last date seen during training - ts_index = timeseries[0].index - training_end = ts_index[int(len(ts_index) * (8 / 10))] - - train_ts = [] - for cat, ts in enumerate(timeseries): - sliced_ts = ts[:training_end] - if len(sliced_ts) > 0: - train_ts.append( - to_dict( - target_values=sliced_ts.values, - start=sliced_ts.index[0], - cat=[cat], - item_id=cat, - ) - ) - - assert len(train_ts) == ds_info.num_series - - save_to_file(train_file, train_ts) - - # time of the first prediction - prediction_dates = [ - frequency_add(training_end, i * ds_info.prediction_length) - for i in range(ds_info.rolling_evaluations) - ] - - test_ts = [] - for prediction_start_date in prediction_dates: - for cat, ts in enumerate(timeseries): - # print(prediction_start_date) - prediction_end_date = frequency_add( - prediction_start_date, ds_info.prediction_length - ) - sliced_ts = ts[:prediction_end_date] - test_ts.append( - to_dict( - target_values=sliced_ts.values, start=sliced_ts.index[0], cat=[cat], - ) - ) - - assert len(test_ts) == ds_info.num_series * ds_info.rolling_evaluations - - save_to_file(test_file, test_ts) diff --git a/pts/dataset/repository/_m4.py b/pts/dataset/repository/_m4.py deleted file mode 100644 index 6fce8ad..0000000 --- a/pts/dataset/repository/_m4.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -import json -import os -from pathlib import Path - -import numpy as np -import pandas as pd - -from ._util import save_to_file, to_dict, metadata - - -def generate_m4_dataset( - dataset_path: Path, m4_freq: str, pandas_freq: str, prediction_length: int -): - m4_dataset_url = "https://github.com/M4Competition/M4-methods/raw/master/Dataset" - train_df = pd.read_csv(f"{m4_dataset_url}/Train/{m4_freq}-train.csv", index_col=0) - test_df = pd.read_csv(f"{m4_dataset_url}/Test/{m4_freq}-test.csv", index_col=0) - - os.makedirs(dataset_path, exist_ok=True) - - with open(dataset_path / "metadata.json", "w") as f: - f.write( - json.dumps( - metadata( - cardinality=len(train_df), - freq=pandas_freq, - prediction_length=prediction_length, - ) - ) - ) - - train_file = dataset_path / "train" / "data.json" - test_file = dataset_path / "test" / "data.json" - - train_target_values = [ts[~np.isnan(ts)] for ts in train_df.values] - - test_target_values = [ - np.hstack([train_ts, test_ts]) - for train_ts, test_ts in zip(train_target_values, test_df.values) - ] - - if m4_freq == "Yearly": - # some time series have more than 300 years which can not be represented in pandas, - # this is probably due to a misclassification of those time series as Yearly - # we simply use only the last 300 years for training - # note this does not affect test time as prediction length is less than 300 years - train_target_values = [ts[-300:] for ts in train_target_values] - test_target_values = [ts[-300:] for ts in test_target_values] - - # the original dataset did not include time stamps, so we use a mock start date for each time series - # we use the earliest point available in pandas - mock_start_dataset = "1750-01-01 00:00:00" - - save_to_file( - train_file, - [ - to_dict( - target_values=target, start=mock_start_dataset, cat=[cat], item_id=cat - ) - for cat, target in enumerate(train_target_values) - ], - ) - - save_to_file( - test_file, - [ - to_dict( - target_values=target, start=mock_start_dataset, cat=[cat], item_id=cat - ) - for cat, target in enumerate(test_target_values) - ], - ) - diff --git a/pts/dataset/repository/_m5.py b/pts/dataset/repository/_m5.py index aa25826..cd70b0b 100644 --- a/pts/dataset/repository/_m5.py +++ b/pts/dataset/repository/_m5.py @@ -6,12 +6,13 @@ from functools import lru_cache import numpy as np import pandas as pd -from pts.dataset import FieldName -from pts.feature import CustomDateFeatureSet, squared_exponential_kernel -from ._util import metadata, save_to_file +from gluonts.dataset.field_names import FieldName +from gluonts.dataset.repository._util import metadata, save_to_file +from gluonts.time_feature.holiday import squared_exponential_kernel +from pts.feature import CustomDateFeatureSet -def generate_m5_dataset( +def generate_pts_m5_dataset( dataset_path: Path, pandas_freq: str, prediction_length: int = 28, @@ -46,7 +47,7 @@ def generate_m5_dataset( ) sales_train_evaluation.sort_index(inplace=True) - sell_prices = pd.read_csv(sell_prices_path, index_col=['item_id', 'store_id']) + sell_prices = pd.read_csv(sell_prices_path, index_col=["item_id", "store_id"]) sell_prices.sort_index(inplace=True) @lru_cache(maxsize=None) @@ -161,16 +162,22 @@ def generate_m5_dataset( "WI": snap_WI_feature, }[state_id] - time_series["target"] = item.iloc[start_index:1913].values.astype(np.float32).tolist() - time_series["feat_dynamic_real"] = np.concatenate( - ( - np.expand_dims(sell_price.iloc[start_index:1913].values, 0), - event_1_feature[:, start_index:1913], - event_2_feature[:, start_index:1913], - snap_feature[:, start_index:1913], - ), - 0, - ).astype(np.float32).tolist() + time_series["target"] = ( + item.iloc[start_index:1913].values.astype(np.float32).tolist() + ) + time_series["feat_dynamic_real"] = ( + np.concatenate( + ( + np.expand_dims(sell_price.iloc[start_index:1913].values, 0), + event_1_feature[:, start_index:1913], + event_2_feature[:, start_index:1913], + snap_feature[:, start_index:1913], + ), + 0, + ) + .astype(np.float32) + .tolist() + ) train_ds.append(time_series.copy()) @@ -222,16 +229,22 @@ def generate_m5_dataset( "WI": snap_WI_feature, }[state_id] - time_series["target"] = item.iloc[start_index:1941].values.astype(np.float32).tolist() - time_series["feat_dynamic_real"] = np.concatenate( - ( - np.expand_dims(sell_price.iloc[start_index:1941].values, 0), - event_1_feature[:, start_index:1941], - event_2_feature[:, start_index:1941], - snap_feature[:, start_index:1941], - ), - 0, - ).astype(np.float32).tolist() + time_series["target"] = ( + item.iloc[start_index:1941].values.astype(np.float32).tolist() + ) + time_series["feat_dynamic_real"] = ( + np.concatenate( + ( + np.expand_dims(sell_price.iloc[start_index:1941].values, 0), + event_1_feature[:, start_index:1941], + event_2_feature[:, start_index:1941], + snap_feature[:, start_index:1941], + ), + 0, + ) + .astype(np.float32) + .tolist() + ) test_ds.append(time_series.copy()) diff --git a/pts/dataset/repository/_util.py b/pts/dataset/repository/_util.py deleted file mode 100644 index df39bde..0000000 --- a/pts/dataset/repository/_util.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -import json -import os -from pathlib import Path -from typing import Dict, List, Optional, Any - -import numpy as np - - -def to_dict( - target_values: np.ndarray, - start: str, - cat: Optional[List[int]] = None, - item_id: Optional[Any] = None, -): - def serialize(x): - if np.isnan(x): - return "NaN" - else: - # return x - return float("{0:.6f}".format(float(x))) - - res = { - "start": str(start), - "target": [serialize(x) for x in target_values], - } - - if cat is not None: - res["feat_static_cat"] = cat - - if item_id is not None: - res["item_id"] = item_id - - return res - - -def save_to_file(path: Path, data: List[Dict]): - print(f"saving time-series into {path}") - path_dir = os.path.dirname(path) - os.makedirs(path_dir, exist_ok=True) - with open(path, "wb") as fp: - for d in data: - fp.write(json.dumps(d).encode("utf-8")) - fp.write("\n".encode("utf-8")) - - -def get_download_path() -> Path: - """ - - Returns - ------- - Path - default path to download datasets - /home/username/.pytorch/pytorch-ts/ - """ - return Path(str(Path.home() / ".pytorch" / "pytorch-ts")) - - -def metadata(cardinality: int, freq: str, prediction_length: int): - return { - "freq": freq, - "prediction_length": prediction_length, - "feat_static_cat": [ - {"name": "feat_static_cat", "cardinality": str(cardinality)} - ], - } diff --git a/pts/dataset/repository/datasets.py b/pts/dataset/repository/datasets.py index be6105a..17cd3da 100644 --- a/pts/dataset/repository/datasets.py +++ b/pts/dataset/repository/datasets.py @@ -1,183 +1,9 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -import logging -from collections import OrderedDict from functools import partial -from pathlib import Path -from pts.dataset import ConstantDataset, TrainDatasets, load_datasets -from ._artificial import generate_artificial_dataset -from ._gp_copula_2019 import generate_gp_copula_dataset -from ._lstnet import generate_lstnet_dataset -from ._m4 import generate_m4_dataset -from ._m5 import generate_m5_dataset -from ._util import get_download_path +from gluonts.dataset.repository.datasets import dataset_recipes -m4_freq = "Hourly" -pandas_freq = "H" -dataset_path = Path(f"m4-{m4_freq}") -prediction_length = 48 +from ._m5 import generate_pts_m5_dataset -dataset_recipes = OrderedDict( - { - # each recipe generates a dataset given a path - "constant": partial(generate_artificial_dataset, dataset=ConstantDataset()), - "exchange_rate": partial(generate_lstnet_dataset, dataset_name="exchange_rate"), - "solar-energy": partial(generate_lstnet_dataset, dataset_name="solar-energy"), - "electricity": partial(generate_lstnet_dataset, dataset_name="electricity"), - "traffic": partial(generate_lstnet_dataset, dataset_name="traffic"), - "exchange_rate_nips": partial( - generate_gp_copula_dataset, dataset_name="exchange_rate_nips" - ), - "electricity_nips": partial( - generate_gp_copula_dataset, dataset_name="electricity_nips" - ), - "traffic_nips": partial( - generate_gp_copula_dataset, dataset_name="traffic_nips" - ), - "solar_nips": partial(generate_gp_copula_dataset, dataset_name="solar_nips"), - "wiki-rolling_nips": partial( - generate_gp_copula_dataset, dataset_name="wiki-rolling_nips" - ), - "taxi_30min": partial(generate_gp_copula_dataset, dataset_name="taxi_30min"), - "m4_hourly": partial( - generate_m4_dataset, - m4_freq="Hourly", - pandas_freq="H", - prediction_length=48, - ), - "m4_daily": partial( - generate_m4_dataset, m4_freq="Daily", pandas_freq="D", prediction_length=14, - ), - "m4_weekly": partial( - generate_m4_dataset, - m4_freq="Weekly", - pandas_freq="W", - prediction_length=13, - ), - "m4_monthly": partial( - generate_m4_dataset, - m4_freq="Monthly", - pandas_freq="M", - prediction_length=18, - ), - "m4_quarterly": partial( - generate_m4_dataset, - m4_freq="Quarterly", - pandas_freq="3M", - prediction_length=8, - ), - "m4_yearly": partial( - generate_m4_dataset, - m4_freq="Yearly", - pandas_freq="12M", - prediction_length=6, - ), - "m5": partial( - generate_m5_dataset, pandas_freq="D", prediction_length=28, alpha=0.5 - ), - } +dataset_recipes["pts_m5"] = partial( + generate_pts_m5_dataset, pandas_freq="D", prediction_length=28 ) - -dataset_names = list(dataset_recipes.keys()) - -default_dataset_path = get_download_path() / "datasets" - - -def materialize_dataset( - dataset_name: str, path: Path = default_dataset_path, regenerate: bool = False, -) -> Path: - """ - Ensures that the dataset is materialized under the `path / dataset_name` - path. - - Parameters - ---------- - dataset_name - name of the dataset, for instance "m4_hourly" - regenerate - whether to regenerate the dataset even if a local file is present. - If this flag is False and the file is present, the dataset will not - be downloaded again. - path - where the dataset should be saved - Returns - ------- - the path where the dataset is materialized - """ - assert dataset_name in dataset_recipes.keys(), ( - f"{dataset_name} is not present, please choose one from " - f"{dataset_recipes.keys()}." - ) - - path.mkdir(parents=True, exist_ok=True) - dataset_path = path / dataset_name - - dataset_recipe = dataset_recipes[dataset_name] - - if not dataset_path.exists() or regenerate: - logging.info(f"downloading and processing {dataset_name}") - dataset_recipe(dataset_path=dataset_path) - else: - logging.info(f"using dataset already processed in path {dataset_path}.") - - return dataset_path - - -def get_dataset( - dataset_name: str, - path: Path = default_dataset_path, - regenerate: bool = False, - shuffle: bool = True, -) -> TrainDatasets: - """ - Get a repository dataset. - - The datasets that can be obtained through this function have been used - with different processing over time by several papers (e.g., [SFG17]_, - [LCY+18]_, and [YRD15]_). - - Parameters - ---------- - dataset_name - name of the dataset, for instance "m4_hourly" - regenerate - whether to regenerate the dataset even if a local file is present. - If this flag is False and the file is present, the dataset will not - be downloaded again. - path - where the dataset should be saved - shuffle - wheather to shuffle the training time series - Returns - ------- - dataset obtained by either downloading or reloading from local file. - """ - dataset_path = materialize_dataset(dataset_name, path, regenerate) - - return load_datasets( - metadata=dataset_path / "metadata.json", - train=dataset_path / "train" / "*.json", - test=dataset_path / "test" / "*.json", - shuffle=shuffle, - ) - - -if __name__ == "__main__": - - for dataset in dataset_names: - print(f"generate {dataset}") - ds = get_dataset(dataset, regenerate=True) - print(ds.metadata) - print(sum(1 for _ in list(iter(ds.train)))) diff --git a/pts/dataset/stat.py b/pts/dataset/stat.py deleted file mode 100644 index 552e1a1..0000000 --- a/pts/dataset/stat.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import math -from collections import defaultdict -from typing import Any, List, NamedTuple, Optional, Set - -import numpy as np -from tqdm import tqdm - -from pts.exception import assert_pts -from .common import FieldName - - -class ScaleHistogram: - """ - Scale histogram of a timeseries dataset - This counts the number of timeseries whose mean of absolute values is in - the `[base ** i, base ** (i+1)]` range for all possible `i`. - The number of entries with empty target is counted separately. - Parameters - ---------- - base - Log-width of the histogram's buckets. - bin_counts - empty_target_count - """ - - def __init__( - self, - base: float = 2.0, - bin_counts: Optional[dict] = None, - empty_target_count: int = 0, - ) -> None: - self._base = base - self.bin_counts = defaultdict(int, {} if bin_counts is None else bin_counts) - self.empty_target_count = empty_target_count - self.__init_args__ = dict( - base=self._base, - bin_counts=self.bin_counts, - empty_target_count=empty_target_count, - ) - - def bucket_index(self, target_values): - assert len(target_values) > 0 - scale = np.mean(np.abs(target_values)) - scale_bin = int(math.log(scale + 1.0, self._base)) - return scale_bin - - def add(self, target_values): - if len(target_values) > 0: - bucket = self.bucket_index(target_values) - self.bin_counts[bucket] = self.bin_counts[bucket] + 1 - else: - self.empty_target_count = self.empty_target_count + 1 - - def count(self, target): - if len(target) > 0: - return self.bin_counts[self.bucket_index(target)] - else: - return self.empty_target_count - - def __len__(self): - return self.empty_target_count + sum(self.bin_counts.values()) - - def __eq__(self, other): - return ( - isinstance(other, ScaleHistogram) - and self.bin_counts == other.bin_counts - and self.empty_target_count == other.empty_target_count - and self._base == other._base - ) - - def __str__(self): - string_repr = [ - "count of scales in {min}-{max}:{count}".format( - min=self._base ** base_index - 1, - max=self._base ** (base_index + 1) - 1, - count=count, - ) - for base_index, count in sorted(self.bin_counts.items(), key=lambda x: x[0]) - ] - return "\n".join(string_repr) - - -class DatasetStatistics(NamedTuple): - """ - A NamedTuple to store the statistics of a Dataset. - """ - - integer_dataset: bool - max_target: float - mean_abs_target: float - mean_target: float - mean_target_length: float - min_target: float - feat_static_real: List[Set[float]] - feat_static_cat: List[Set[int]] - num_feat_dynamic_real: Optional[int] - num_feat_dynamic_cat: Optional[int] - num_missing_values: int - num_time_observations: int - num_time_series: int - scale_histogram: ScaleHistogram - - # DO NOT override the __str__ method, since we rely that we can load - # DatasetStatistics again; i.e. stats == eval(str(stats)) - - def __eq__(self, other): - for x, y in zip(self._asdict().values(), other._asdict().values()): - if isinstance(x, float): - if abs(x - y) > abs(0.0001 * x): - return False - elif x != y: - return False - return True - - -# TODO: reorganize modules to avoid circular dependency -# TODO: and substitute Any with Dataset -def calculate_dataset_statistics(ts_dataset: Any) -> DatasetStatistics: - """ - Computes the statistics of a given Dataset. - - Parameters - ---------- - ts_dataset - Dataset of which to compute the statistics. - - Returns - ------- - DatasetStatistics - NamedTuple containing the statistics. - """ - num_time_observations = 0 - num_time_series = 0 - min_target = 1e20 - max_target = -1e20 - sum_target = 0.0 - sum_abs_target = 0.0 - integer_dataset = True - observed_feat_static_cat: Optional[List[Set[int]]] = None - observed_feat_static_real: Optional[List[Set[float]]] = None - num_feat_static_real: Optional[int] = None - num_feat_static_cat: Optional[int] = None - num_feat_dynamic_real: Optional[int] = None - num_feat_dynamic_cat: Optional[int] = None - num_missing_values = 0 - - scale_histogram = ScaleHistogram() - - with tqdm(enumerate(ts_dataset, start=1), total=len(ts_dataset)) as it: - for num_time_series, ts in it: - - # TARGET - target = ts[FieldName.TARGET] - observed_target = target[~np.isnan(target)] - num_observations = len(observed_target) - - if num_observations > 0: - # 'nan' is handled in observed_target definition - assert_pts( - np.all(np.isfinite(observed_target)), - "Target values have to be finite (e.g., not inf, -inf, " - "or None) and cannot exceed single precision floating " - "point range.", - ) - - num_time_observations += num_observations - min_target = float(min(min_target, observed_target.min())) - max_target = float(max(max_target, observed_target.max())) - num_missing_values += int(np.isnan(target).sum()) - sum_target += float(observed_target.sum()) - sum_abs_target += float(np.abs(observed_target).sum()) - integer_dataset = integer_dataset and bool( - np.all(np.mod(observed_target, 1) == 0) - ) - - scale_histogram.add(observed_target) # after checks for inf and None - - # FEAT_STATIC_CAT - feat_static_cat = ( - ts[FieldName.FEAT_STATIC_CAT] if FieldName.FEAT_STATIC_CAT in ts else [] - ) - - if num_feat_static_cat is None: - num_feat_static_cat = len(feat_static_cat) - observed_feat_static_cat = [set() for _ in range(num_feat_static_cat)] - - # needed to type check - assert num_feat_static_cat is not None - assert observed_feat_static_cat is not None - - assert_pts( - num_feat_static_cat == len(feat_static_cat), - "Not all feat_static_cat vectors have the same length {} != {}.", - num_feat_static_cat, - len(feat_static_cat), - ) - for i, c in enumerate(feat_static_cat): - observed_feat_static_cat[i].add(c) - - # FEAT_STATIC_REAL - feat_static_real = ( - ts[FieldName.FEAT_STATIC_REAL] - if FieldName.FEAT_STATIC_REAL in ts - else [] - ) - - if num_feat_static_real is None: - num_feat_static_real = len(feat_static_real) - observed_feat_static_real = [set() for _ in range(num_feat_static_real)] - - # needed to type check - assert num_feat_static_real is not None - assert observed_feat_static_real is not None - - assert_pts( - num_feat_static_real == len(feat_static_real), - "Not all feat_static_real vectors have the same length {} != {}.", - num_feat_static_real, - len(feat_static_real), - ) - for i, c in enumerate(feat_static_real): - observed_feat_static_real[i].add(c) - - # FEAT_DYNAMIC_CAT - feat_dynamic_cat = ( - ts[FieldName.FEAT_DYNAMIC_CAT] - if FieldName.FEAT_DYNAMIC_CAT in ts - else None - ) - - if feat_dynamic_cat is None: - # feat_dynamic_cat not found, check it was the first ts we encounter or - # that feat_dynamic_cat were seen before - assert_pts( - num_feat_dynamic_cat is None or num_feat_dynamic_cat == 0, - "feat_dynamic_cat was found for some instances but not others.", - ) - num_feat_dynamic_cat = 0 - else: - if num_feat_dynamic_cat is None: - # first num_feat_dynamic_cat found - num_feat_dynamic_cat = feat_dynamic_cat.shape[0] - else: - assert_pts( - num_feat_dynamic_cat == feat_dynamic_cat.shape[0], - "Found instances with different number of features in " - "feat_dynamic_cat, found one with {} and another with {}.", - num_feat_dynamic_cat, - feat_dynamic_cat.shape[0], - ) - - assert_pts( - np.all(np.isfinite(feat_dynamic_cat)), - "Features values have to be finite and cannot exceed single " - "precision floating point range.", - ) - num_feat_dynamic_cat_time_steps = feat_dynamic_cat.shape[1] - assert_pts( - num_feat_dynamic_cat_time_steps == len(target), - "Each feature in feat_dynamic_cat has to have the same length as " - "the target. Found an instance with feat_dynamic_cat of length {} " - "and a target of length {}.", - num_feat_dynamic_cat_time_steps, - len(target), - ) - - # FEAT_DYNAMIC_REAL - feat_dynamic_real = ( - ts[FieldName.FEAT_DYNAMIC_REAL] - if FieldName.FEAT_DYNAMIC_REAL in ts - else None - ) - - if feat_dynamic_real is None: - # feat_dynamic_real not found, check it was the first ts we encounter or - # that feat_dynamic_real were seen before - assert_pts( - num_feat_dynamic_real is None or num_feat_dynamic_real == 0, - "feat_dynamic_real was found for some instances but not others.", - ) - num_feat_dynamic_real = 0 - else: - if num_feat_dynamic_real is None: - # first num_feat_dynamic_real found - num_feat_dynamic_real = feat_dynamic_real.shape[0] - else: - assert_pts( - num_feat_dynamic_real == feat_dynamic_real.shape[0], - "Found instances with different number of features in " - "feat_dynamic_real, found one with {} and another with {}.", - num_feat_dynamic_real, - feat_dynamic_real.shape[0], - ) - - assert_pts( - np.all(np.isfinite(feat_dynamic_real)), - "Features values have to be finite and cannot exceed single " - "precision floating point range.", - ) - num_feat_dynamic_real_time_steps = feat_dynamic_real.shape[1] - assert_pts( - num_feat_dynamic_real_time_steps == len(target), - "Each feature in feat_dynamic_real has to have the same length as " - "the target. Found an instance with feat_dynamic_real of length {} " - "and a target of length {}.", - num_feat_dynamic_real_time_steps, - len(target), - ) - - assert_pts(num_time_series > 0, "Time series dataset is empty!") - assert_pts( - num_time_observations > 0, "Only empty time series found in the dataset!", - ) - - # note this require the above assumption to avoid a division by zero - # runtime error - mean_target_length = num_time_observations / num_time_series - - # note this require the above assumption to avoid a division by zero - # runtime error - mean_target = sum_target / num_time_observations - mean_abs_target = sum_abs_target / num_time_observations - - integer_dataset = integer_dataset and min_target >= 0.0 - - assert len(scale_histogram) == num_time_series - - return DatasetStatistics( - integer_dataset=integer_dataset, - max_target=max_target, - mean_abs_target=mean_abs_target, - mean_target=mean_target, - mean_target_length=mean_target_length, - min_target=min_target, - num_missing_values=num_missing_values, - feat_static_real=observed_feat_static_real if observed_feat_static_real else [], - feat_static_cat=observed_feat_static_cat if observed_feat_static_cat else [], - num_feat_dynamic_real=num_feat_dynamic_real, - num_feat_dynamic_cat=num_feat_dynamic_cat, - num_time_observations=num_time_observations, - num_time_series=num_time_series, - scale_histogram=scale_histogram, - ) diff --git a/pts/dataset/transformed_iterable_dataset.py b/pts/dataset/transformed_iterable_dataset.py deleted file mode 100644 index 3be87c6..0000000 --- a/pts/dataset/transformed_iterable_dataset.py +++ /dev/null @@ -1,47 +0,0 @@ -import itertools -from typing import Dict, Iterable, Iterator, Optional - -import numpy as np -import torch - -from pts.transform.transform import Transformation -from .common import DataEntry, Dataset - - -class TransformedIterableDataset(torch.utils.data.IterableDataset): - def __init__( - self, dataset: Dataset, is_train: bool, transform: Transformation - ) -> None: - super().__init__() - self.dataset = dataset - self.transform = transform - self.is_train = is_train - self._cur_iter: Optional[Iterator] = None - - def _iterate_forever(self, collection: Iterable[DataEntry]) -> Iterator[DataEntry]: - # iterate forever over the collection, the collection must be non empty - while True: - try: - first = next(iter(collection)) - except StopIteration: - raise Exception("empty dataset") - else: - for x in itertools.chain([first], collection): - yield x - - def __iter__(self) -> Iterator[Dict[str, np.ndarray]]: - if self._cur_iter is None: - self._cur_iter = self.transform( - self._iterate_forever(self.dataset), is_train=self.is_train - ) - assert self._cur_iter is not None - while True: - data_entry = next(self._cur_iter) - yield { - k: (v.astype(np.float32) if v.dtype.kind == "f" else v) - for k, v in data_entry.items() - if isinstance(v, np.ndarray) == True - } - - # def __len__(self) -> int: - # return len(self.dataset) diff --git a/pts/dataset/utils.py b/pts/dataset/utils.py deleted file mode 100644 index f7e30c0..0000000 --- a/pts/dataset/utils.py +++ /dev/null @@ -1,148 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import shutil -from pathlib import Path - -import numpy as np -import pandas as pd -import rapidjson as json - -from .common import TrainDatasets, MetaData -from .file_dataset import FileDataset - - -def frequency_add(ts: pd.Timestamp, amount: int) -> pd.Timestamp: - return ts + ts.freq * amount - - -def forecast_start(entry): - return frequency_add(entry["start"], len(entry["target"])) - - -def to_pandas(instance: dict, freq: str = None) -> pd.Series: - """ - Transform a dictionary into a pandas.Series object, using its - "start" and "target" fields. - - Parameters - ---------- - instance - Dictionary containing the time series data. - freq - Frequency to use in the pandas.Series index. - - Returns - ------- - pandas.Series - Pandas time series object. - """ - target = instance["target"] - start = instance["start"] - if not freq: - freq = start.freqstr - index = pd.date_range(start=start, periods=len(target), freq=freq) - return pd.Series(target, index=index) - - -def load_datasets(metadata, train, test, shuffle: bool = False) -> TrainDatasets: - """ - Loads a dataset given metadata, train and test path. - Parameters - ---------- - metadata - Path to the metadata file - train - Path to the training dataset files. - test - Path to the test dataset files. - shuffle - Return shuffled train data. - Returns - ------- - TrainDatasets - An object collecting metadata, training data, test data. - """ - meta = MetaData.parse_file(metadata) - train_ds = FileDataset(train, meta.freq, shuffle=shuffle) - test_ds = FileDataset(test, meta.freq) if test else None - - return TrainDatasets(metadata=meta, train=train_ds, test=test_ds) - - -def save_datasets(dataset: TrainDatasets, path_str: str, overwrite=True) -> None: - """ - Saves an TrainDatasets object to a JSON Lines file. - - Parameters - ---------- - dataset - The training datasets. - path_str - Where to save the dataset. - overwrite - Whether to delete previous version in this folder. - """ - path = Path(path_str) - - if overwrite: - shutil.rmtree(path, ignore_errors=True) - - def dump_line(f, line): - f.write(json.dumps(line).encode("utf-8")) - f.write("\n".encode("utf-8")) - - (path / "metadata").mkdir(parents=True) - with open(path / "metadata/metadata.json", "wb") as f: - dump_line(f, dataset.metadata.dict()) - - (path / "train").mkdir(parents=True) - with open(path / "train/data.json", "wb") as f: - for entry in dataset.train: - dump_line(f, serialize_data_entry(entry)) - - if dataset.test is not None: - (path / "test").mkdir(parents=True) - with open(path / "test/data.json", "wb") as f: - for entry in dataset.test: - dump_line(f, serialize_data_entry(entry)) - - -def serialize_data_entry(data): - """ - Encode the numpy values in the a DataEntry dictionary into lists so the - dictionary can be JSON serialized. - - Parameters - ---------- - data - The dictionary to be transformed. - - Returns - ------- - Dict - The transformed dictionary, where all fields where transformed into - strings. - """ - - def serialize_field(field): - if isinstance(field, np.ndarray): - # circumvent https://github.com/micropython/micropython/issues/3511 - nan_ix = np.isnan(field) - field = field.astype(np.object_) - field[nan_ix] = "NaN" - return field.tolist() - return str(field) - - return {k: serialize_field(v) for k, v in data.items() if v is not None} diff --git a/pts/distributions/implicit_quantile.py b/pts/distributions/implicit_quantile.py index 5cff8f3..c950bdf 100644 --- a/pts/distributions/implicit_quantile.py +++ b/pts/distributions/implicit_quantile.py @@ -2,10 +2,15 @@ import torch from torch.distributions import Distribution, TransformedDistribution, AffineTransform - class ImplicitQuantile(Distribution): - - def __init__(self, implicit_quantile_function, taus, nn_output, predicted_quantiles, validate_args=None): + def __init__( + self, + implicit_quantile_function, + taus, + nn_output, + predicted_quantiles, + validate_args=None, + ): self.predicted_quantiles = predicted_quantiles[0] self.taus = taus self.quantile_function = implicit_quantile_function @@ -46,9 +51,8 @@ class ImplicitQuantile(Distribution): @staticmethod def quantile_loss(quantile_forecast, target, tau): return torch.abs( - (quantile_forecast - target) - * ((target <= quantile_forecast).float() - tau) - ) + (quantile_forecast - target) * ((target <= quantile_forecast).float() - tau) + ) class TransformedImplicitQuantile(TransformedDistribution): @@ -63,4 +67,3 @@ class TransformedImplicitQuantile(TransformedDistribution): scale *= transform.scale p = self.base_dist.log_prob(x) return p * scale - diff --git a/pts/distributions/zero_inflated.py b/pts/distributions/zero_inflated.py index 266fc82..6befd7b 100644 --- a/pts/distributions/zero_inflated.py +++ b/pts/distributions/zero_inflated.py @@ -118,7 +118,10 @@ class ZeroInflatedNegativeBinomial(ZeroInflatedDistribution): def __init__(self, gate, total_count, probs=None, logits=None, validate_args=None): base_dist = NegativeBinomial( - total_count=total_count, probs=probs, logits=logits, validate_args=False, + total_count=total_count, + probs=probs, + logits=logits, + validate_args=False, ) base_dist._validate_args = validate_args diff --git a/pts/evaluation/__init__.py b/pts/evaluation/__init__.py deleted file mode 100644 index 95aefb3..0000000 --- a/pts/evaluation/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .backtest import make_evaluation_predictions, backtest_metrics -from .evaluator import Evaluator, MultivariateEvaluator diff --git a/pts/evaluation/backtest.py b/pts/evaluation/backtest.py deleted file mode 100644 index 35a8a7a..0000000 --- a/pts/evaluation/backtest.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -# Standard library imports -import logging -from typing import Dict, Iterator, NamedTuple, Optional, Tuple, Union - -# Third-party imports -import pandas as pd - -from pts.dataset import ( - DataEntry, - Dataset, - DatasetStatistics, - calculate_dataset_statistics, -) -from pts.model import Estimator, Predictor, Forecast -# First-party imports -from pts.transform import AdhocTransform, TransformedDataset -from .evaluator import Evaluator - - -def make_evaluation_predictions( - dataset: Dataset, predictor: Predictor, num_samples: int -) -> Tuple[Iterator[Forecast], Iterator[pd.Series]]: - """ - Return predictions on the last portion of predict_length time units of the - target. Such portion is cut before making predictions, such a function can - be used in evaluations where accuracy is evaluated on the last portion of - the target. - - Parameters - ---------- - dataset - Dataset where the evaluation will happen. Only the portion excluding - the prediction_length portion is used when making prediction. - predictor - Model used to draw predictions. - num_samples - Number of samples to draw on the model when evaluating. - - Returns - ------- - """ - - prediction_length = predictor.prediction_length - freq = predictor.freq - - def add_ts_dataframe(data_iterator: Iterator[DataEntry]) -> Iterator[DataEntry]: - for data_entry in data_iterator: - data = data_entry.copy() - index = pd.date_range( - start=data["start"], freq=freq, periods=data["target"].shape[-1], - ) - data["ts"] = pd.DataFrame(index=index, data=data["target"].transpose()) - yield data - - def ts_iter(dataset: Dataset) -> pd.DataFrame: - for data_entry in add_ts_dataframe(iter(dataset)): - yield data_entry["ts"] - - def truncate_target(data): - data = data.copy() - target = data["target"] - assert ( - target.shape[-1] >= prediction_length - ) # handles multivariate case (target_dim, history_length) - data["target"] = target[..., :-prediction_length] - return data - - # TODO filter out time series with target shorter than prediction length - # TODO or fix the evaluator so it supports missing values instead (all - # TODO the test set may be gone otherwise with such a filtering) - - dataset_trunc = TransformedDataset( - dataset, transformations=[AdhocTransform(truncate_target)] - ) - - return ( - predictor.predict(dataset_trunc, num_samples=num_samples), - ts_iter(dataset), - ) - - -train_dataset_stats_key = "train_dataset_stats" -test_dataset_stats_key = "test_dataset_stats" -estimator_key = "estimator" -agg_metrics_key = "agg_metrics" - - -def serialize_message(logger, message: str, variable): - logger.info(f"pts[{message}]: {variable}") - - -def backtest_metrics( - train_dataset: Optional[Dataset], - test_dataset: Dataset, - forecaster: Union[Estimator, Predictor], - evaluator=Evaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), - num_samples: int = 100, - logging_file: Optional[str] = None, -): - """ - Parameters - ---------- - train_dataset - Dataset to use for training. - test_dataset - Dataset to use for testing. - forecaster - An estimator or a predictor to use for generating predictions. - evaluator - Evaluator to use. - num_samples - Number of samples to use when generating sample-based forecasts. - logging_file - If specified, information of the backtest is redirected to this file. - - Returns - ------- - tuple - A tuple of aggregate metrics and per-time-series metrics obtained by - training `forecaster` on `train_dataset` and evaluating the resulting - `evaluator` provided on the `test_dataset`. - """ - - if logging_file is not None: - log_formatter = logging.Formatter( - "[%(asctime)s %(levelname)s %(thread)d] %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - ) - logger = logging.getLogger(__name__) - handler = logging.FileHandler(logging_file) - handler.setFormatter(log_formatter) - logger.addHandler(handler) - else: - logger = logging.getLogger(__name__) - - if train_dataset is not None: - train_statistics = calculate_dataset_statistics(train_dataset) - serialize_message(logger, train_dataset_stats_key, train_statistics) - - test_statistics = calculate_dataset_statistics(test_dataset) - serialize_message(logger, test_dataset_stats_key, test_statistics) - - if isinstance(forecaster, Estimator): - serialize_message(logger, estimator_key, forecaster) - assert train_dataset is not None - predictor = forecaster.train(train_dataset) - else: - predictor = forecaster - - forecast_it, ts_it = make_evaluation_predictions( - test_dataset, predictor=predictor, num_samples=num_samples - ) - - agg_metrics, item_metrics = evaluator( - ts_it, forecast_it, num_series=len(test_dataset) - ) - - # we only log aggregate metrics for now as item metrics may be very large - for name, value in agg_metrics.items(): - serialize_message(logger, f"metric-{name}", value) - - if logging_file is not None: - # Close the file handler to avoid letting the file open. - # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown - logger.removeHandler(handler) - del logger, handler - - return agg_metrics, item_metrics - - -class BacktestInformation(NamedTuple): - train_dataset_stats: DatasetStatistics - test_dataset_stats: DatasetStatistics - estimator: Estimator - agg_metrics: Dict[str, float] - - # @staticmethod - # def make_from_log(log_file): - # with open(log_file, "r") as f: - # return BacktestInformation.make_from_log_contents( - # "\n".join(f.readlines()) - # ) - - # @staticmethod - # def make_from_log_contents(log_contents): - # messages = dict(re.findall(r"pts\[(.*)\]: (.*)", log_contents)) - - # # avoid to fail if a key is missing for instance in the case a run did - # # not finish so that we can still get partial information - # try: - # return BacktestInformation( - # train_dataset_stats=eval( - # messages[train_dataset_stats_key] - # ), # TODO: use load - # test_dataset_stats=eval( - # messages[test_dataset_stats_key] - # ), # TODO: use load - # estimator=load_code(messages[estimator_key]), - # agg_metrics={ - # k: load_code(v) - # for k, v in messages.items() - # if k.startswith("metric-") and v != "nan" - # }, - # ) - # except Exception as error: - # logging.error(error) - # return None diff --git a/pts/evaluation/evaluator.py b/pts/evaluation/evaluator.py deleted file mode 100644 index 3f50842..0000000 --- a/pts/evaluation/evaluator.py +++ /dev/null @@ -1,730 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Standard library imports -import multiprocessing -import sys - -from itertools import chain, tee -from typing import ( - Any, - Dict, - Iterable, - Iterator, - List, - Optional, - Tuple, - Union, - Callable, -) - -# Third-party imports -import numpy as np -import pandas as pd -from tqdm import tqdm - -# First-party imports -from pts.feature import get_seasonality -from pts.model import Quantile, Forecast - - -class Evaluator: - """ - Evaluator class, to compute accuracy metrics by comparing observations - to forecasts. - - Parameters - ---------- - quantiles - list of strings of the form 'p10' or floats in [0, 1] with - the quantile levels - seasonality - seasonality to use for seasonal_error, if nothing is passed - uses the default seasonality - for the given series frequency as returned by `get_seasonality` - alpha - Parameter of the MSIS metric from the M4 competition that - defines the confidence interval. - For alpha=0.05 (default) the 95% considered is considered in the metric, - see https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf - for more detail on MSIS - calculate_owa - Determines whether the OWA metric should also be calculated, - which is computationally expensive to evaluate and thus slows - down the evaluation process considerably. - By default False. - num_workers - The number of multiprocessing workers that will be used to process - the data in parallel. - Default is multiprocessing.cpu_count(). - Setting it to 0 means no multiprocessing. - chunk_size - Controls the approximate chunk size each workers handles at a time. - Default is 32. - """ - - default_quantiles = 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 - - def __init__( - self, - quantiles: Iterable[Union[float, str]] = default_quantiles, - seasonality: Optional[int] = None, - alpha: float = 0.05, - calculate_owa: bool = False, - num_workers: Optional[int] = None, - chunk_size: Optional[int] = None, - ) -> None: - self.quantiles = tuple(map(Quantile.parse, quantiles)) - self.seasonality = seasonality - self.alpha = alpha - self.calculate_owa = calculate_owa - - self.num_workers = ( - num_workers if num_workers is not None else multiprocessing.cpu_count() - ) - self.chunk_size = chunk_size if chunk_size is not None else 32 - - def __call__( - self, - ts_iterator: Iterable[Union[pd.DataFrame, pd.Series]], - fcst_iterator: Iterable[Forecast], - num_series: Optional[int] = None, - ) -> Tuple[Dict[str, float], pd.DataFrame]: - """ - Compute accuracy metrics by comparing actual data to the forecasts. - - Parameters - ---------- - ts_iterator - iterator containing true target on the predicted range - fcst_iterator - iterator of forecasts on the predicted range - num_series - number of series of the iterator - (optional, only used for displaying progress) - - Returns - ------- - dict - Dictionary of aggregated metrics - pd.DataFrame - DataFrame containing per-time-series metrics - """ - ts_iterator = iter(ts_iterator) - fcst_iterator = iter(fcst_iterator) - - rows = [] - - with tqdm( - zip(ts_iterator, fcst_iterator), - total=num_series, - desc="Running evaluation", - ) as it, np.errstate(invalid="ignore"): - if self.num_workers > 0 and not sys.platform == "win32": - mp_pool = multiprocessing.Pool( - initializer=_worker_init(self), processes=self.num_workers - ) - rows = mp_pool.map( - func=_worker_fun, iterable=iter(it), chunksize=self.chunk_size, - ) - mp_pool.close() - mp_pool.join() - else: - for ts, forecast in it: - rows.append(self.get_metrics_per_ts(ts, forecast)) - - assert not any( - True for _ in ts_iterator - ), "ts_iterator has more elements than fcst_iterator" - - assert not any( - True for _ in fcst_iterator - ), "fcst_iterator has more elements than ts_iterator" - - if num_series is not None: - assert ( - len(rows) == num_series - ), f"num_series={num_series} did not match number of elements={len(rows)}" - - # If all entries of a target array are NaNs, the resulting metric will have value "masked". Pandas does not - # handle masked values correctly. Thus we set dtype=np.float64 to convert masked values back to NaNs which - # are handled correctly by pandas Dataframes during aggregation. - metrics_per_ts = pd.DataFrame(rows, dtype=np.float64) - return self.get_aggregate_metrics(metrics_per_ts) - - @staticmethod - def extract_pred_target( - time_series: Union[pd.Series, pd.DataFrame], forecast: Forecast - ) -> np.ndarray: - """ - - Parameters - ---------- - time_series - forecast - - Returns - ------- - np.ndarray - time series cut in the Forecast object dates - """ - assert forecast.index.intersection(time_series.index).equals(forecast.index), ( - "Cannot extract prediction target since the index of forecast is outside the index of target\n" - f"Index of forecast: {forecast.index}\n Index of target: {time_series.index}" - ) - - # cut the time series using the dates of the forecast object - return np.atleast_1d(np.squeeze(time_series.loc[forecast.index].transpose())) - - # This method is needed for the owa calculation - # It extracts the training sequence from the Series or DataFrame to a numpy array - @staticmethod - def extract_past_data( - time_series: Union[pd.Series, pd.DataFrame], forecast: Forecast - ) -> np.ndarray: - """ - - Parameters - ---------- - time_series - forecast - - Returns - ------- - np.ndarray - time series without the forecast dates - """ - - assert forecast.index.intersection(time_series.index).equals(forecast.index), ( - "Index of forecast is outside the index of target\n" - f"Index of forecast: {forecast.index}\n Index of target: {time_series.index}" - ) - - # Remove the prediction range - # If the prediction range is not in the end of the time series, - # everything after the prediction range is truncated - date_before_forecast = forecast.index[0] - forecast.index[0].freq - return np.atleast_1d( - np.squeeze(time_series.loc[:date_before_forecast].transpose()) - ) - - def seasonal_error(self, past_data: np.ndarray, forecast: Forecast) -> float: - r""" - .. math:: - - seasonal_error = mean(|Y[t] - Y[t-m]|) - - where m is the seasonal frequency - https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf - """ - # Check if the length of the time series is larger than the seasonal frequency - seasonality = ( - self.seasonality if self.seasonality else get_seasonality(forecast.freq) - ) - if seasonality < len(past_data): - forecast_freq = seasonality - else: - # edge case: the seasonal freq is larger than the length of ts - # revert to freq=1 - # logging.info('The seasonal frequency is larger than the length of the time series. Reverting to freq=1.') - forecast_freq = 1 - y_t = past_data[:-forecast_freq] - y_tm = past_data[forecast_freq:] - - seasonal_mae = np.mean(abs(y_t - y_tm)) - - return seasonal_mae if seasonal_mae is not np.ma.masked else np.nan - - def get_metrics_per_ts( - self, time_series: Union[pd.Series, pd.DataFrame], forecast: Forecast - ) -> Dict[str, Union[float, str, None]]: - pred_target = np.array(self.extract_pred_target(time_series, forecast)) - pred_target = np.ma.masked_invalid(pred_target) - - # required for seasonal_error and owa calculation - past_data = np.array(self.extract_past_data(time_series, forecast)) - past_data = np.ma.masked_invalid(past_data) - - try: - mean_fcst = forecast.mean - except: - mean_fcst = None - median_fcst = forecast.quantile(0.5) - seasonal_error = self.seasonal_error(past_data, forecast) - metrics = { - "item_id": forecast.item_id, - "MSE": self.mse(pred_target, mean_fcst) if mean_fcst is not None else None, - "abs_error": self.abs_error(pred_target, median_fcst), - "abs_target_sum": self.abs_target_sum(pred_target), - "abs_target_mean": self.abs_target_mean(pred_target), - "seasonal_error": seasonal_error, - "MASE": self.mase(pred_target, median_fcst, seasonal_error), - "MAPE": self.mape(pred_target, median_fcst), - "sMAPE": self.smape(pred_target, median_fcst), - "OWA": np.nan, # by default not calculated - "MSIS": self.msis( - pred_target, - forecast.quantile(self.alpha / 2), - forecast.quantile(1.0 - self.alpha / 2), - seasonal_error, - self.alpha, - ), - } - - if self.calculate_owa: - metrics["OWA"] = self.owa( - pred_target, - median_fcst, - past_data, - seasonal_error, - forecast.start_date, - ) - - for quantile in self.quantiles: - forecast_quantile = forecast.quantile(quantile.value) - - metrics[quantile.loss_name] = self.quantile_loss( - pred_target, forecast_quantile, quantile.value - ) - metrics[quantile.coverage_name] = self.coverage( - pred_target, forecast_quantile - ) - - return metrics - - def get_aggregate_metrics( - self, metric_per_ts: pd.DataFrame - ) -> Tuple[Dict[str, float], pd.DataFrame]: - agg_funs = { - "MSE": "mean", - "abs_error": "sum", - "abs_target_sum": "sum", - "abs_target_mean": "mean", - "seasonal_error": "mean", - "MASE": "mean", - "MAPE": "mean", - "sMAPE": "mean", - "OWA": "mean", - "MSIS": "mean", - } - for quantile in self.quantiles: - agg_funs[quantile.loss_name] = "sum" - agg_funs[quantile.coverage_name] = "mean" - - assert ( - set(metric_per_ts.columns) >= agg_funs.keys() - ), "The some of the requested item metrics are missing." - - totals = {key: metric_per_ts[key].agg(agg) for key, agg in agg_funs.items()} - - # derived metrics based on previous aggregate metrics - totals["RMSE"] = np.sqrt(totals["MSE"]) - - flag = totals["abs_target_mean"] == 0 - totals["NRMSE"] = np.divide( - totals["RMSE"] * (1 - flag), totals["abs_target_mean"] + flag - ) - - flag = totals["abs_target_sum"] == 0 - totals["ND"] = np.divide( - totals["abs_error"] * (1 - flag), totals["abs_target_sum"] + flag - ) - - all_qLoss_names = [quantile.weighted_loss_name for quantile in self.quantiles] - for quantile in self.quantiles: - totals[quantile.weighted_loss_name] = np.divide( - totals[quantile.loss_name], totals["abs_target_sum"] - ) - - totals["mean_wQuantileLoss"] = np.array( - [totals[ql] for ql in all_qLoss_names] - ).mean() - - totals["MAE_Coverage"] = np.mean( - [ - np.abs(totals[q.coverage_name] - np.array([q.value])) - for q in self.quantiles - ] - ) - return totals, metric_per_ts - - @staticmethod - def mse(target, forecast): - return np.mean(np.square(target - forecast)) - - @staticmethod - def abs_error(target, forecast): - return np.sum(np.abs(target - forecast)) - - @staticmethod - def quantile_loss(target, quantile_forecast, q): - return 2.0 * np.sum( - np.abs((quantile_forecast - target) * ((target <= quantile_forecast) - q)) - ) - - @staticmethod - def coverage(target, quantile_forecast): - return np.mean((target < quantile_forecast)) - - @staticmethod - def mase(target, forecast, seasonal_error): - r""" - .. math:: - - mase = mean(|Y - Y_hat|) / seasonal_error - - https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf - """ - flag = seasonal_error == 0 - return (np.mean(np.abs(target - forecast)) * (1 - flag)) / ( - seasonal_error + flag - ) - - @staticmethod - def mape(target, forecast): - r""" - .. math:: - - mape = mean(|Y - Y_hat| / |Y|)) - """ - - denominator = np.abs(target) - flag = denominator == 0 - - mape = np.mean((np.abs(target - forecast) * (1 - flag)) / (denominator + flag)) - return mape - - @staticmethod - def smape(target, forecast): - r""" - .. math:: - - smape = mean(2 * |Y - Y_hat| / (|Y| + |Y_hat|)) - - https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf - """ - - denominator = np.abs(target) + np.abs(forecast) - flag = denominator == 0 - - smape = 2 * np.mean( - (np.abs(target - forecast) * (1 - flag)) / (denominator + flag) - ) - return smape - - @staticmethod - def owa( - target: np.ndarray, - forecast: np.ndarray, - past_data: np.ndarray, - seasonal_error: float, - start_date: pd.Timestamp, - ) -> float: - r""" - .. math:: - - owa = 0.5*(smape/smape_naive + mase/mase_naive) - - https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf - """ - # avoid import error due to circular dependency - from gluonts.model.naive_2 import naive_2 - - # calculate the forecast of the seasonal naive predictor - naive_median_fcst = naive_2(past_data, len(target), freq=start_date.freqstr) - - owa = 0.5 * ( - ( - Evaluator.smape(target, forecast) - / Evaluator.smape(target, naive_median_fcst) - ) - + ( - Evaluator.mase(target, forecast, seasonal_error) - / Evaluator.mase(target, naive_median_fcst, seasonal_error) - ) - ) - - return owa - - @staticmethod - def msis(target, lower_quantile, upper_quantile, seasonal_error, alpha): - r""" - :math: - - msis = mean(U - L + 2/alpha * (L-Y) * I[YU]) /seasonal_error - - https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf - """ - numerator = np.mean( - upper_quantile - - lower_quantile - + 2.0 / alpha * (lower_quantile - target) * (target < lower_quantile) - + 2.0 / alpha * (target - upper_quantile) * (target > upper_quantile) - ) - - flag = seasonal_error == 0 - return (numerator * (1 - flag)) / (seasonal_error + flag) - - @staticmethod - def abs_target_sum(target): - return np.sum(np.abs(target)) - - @staticmethod - def abs_target_mean(target): - return np.mean(np.abs(target)) - - -class MultivariateEvaluator(Evaluator): - """ - - The MultivariateEvaluator class owns functionality for evaluating - multidimensional target arrays of shape - (target_dimensionality, prediction_length). - - Evaluations of individual dimensions will be stored with the corresponding - dimension prefix and contain the metrics calculated by only this dimension. - Metrics with the plain metric name correspond to metrics calculated over - all dimensions. - Additionally, the user can provide additional aggregation functions that - first aggregate the target and forecast over dimensions and then calculate - the metric. These metrics will be prefixed with m__ - - The evaluation dimensions can be set by the user. - - Example: - {'0_MSE': 0.004307240342677687, # MSE of dimension 0 - '0_abs_error': 1.6246897801756859, - '1_MSE': 0.003949341769475723, # MSE of dimension 1 - '1_abs_error': 1.5052175521850586, - 'MSE': 0.004128291056076705, # MSE of all dimensions - 'abs_error': 3.1299073323607445, - 'm_sum_MSE': 0.02 # MSE of aggregated target and aggregated forecast - (if target_agg_funcs is set). - 'm_sum_abs_error': 4.2} - """ - - def __init__( - self, - quantiles: Iterable[Union[float, str]] = np.linspace(0.1, 0.9, 9), - seasonality: Optional[int] = None, - alpha: float = 0.05, - eval_dims: List[int] = None, - target_agg_funcs: Dict[str, Callable] = {}, - ) -> None: - """ - - Parameters - ---------- - quantiles - list of strings of the form 'p10' or floats in [0, 1] with the - quantile levels - seasonality - seasonality to use for seasonal_error, if nothing is passed uses - the default seasonality for the given series frequency as - returned by `get_seasonality` - alpha - parameter of the MSIS metric that defines the CI, - e.g., for alpha=0.05 the 95% CI is considered in the metric. - eval_dims - dimensions of the target that will be evaluated. - target_agg_funcs - pass key-value pairs that define aggregation functions over the - dimension axis. Useful to compute metrics over aggregated target - and forecast (typically sum or mean). - """ - super().__init__(quantiles=quantiles, seasonality=seasonality, alpha=alpha) - self._eval_dims = eval_dims - self.target_agg_funcs = target_agg_funcs - - @staticmethod - def extract_target_by_dim( - it_iterator: Iterator[pd.DataFrame], dim: int - ) -> Iterator[pd.DataFrame]: - for i in it_iterator: - yield (i[dim]) - - @staticmethod - def extract_forecast_by_dim( - forecast_iterator: Iterator[Forecast], dim: int - ) -> Iterator[Forecast]: - for forecast in forecast_iterator: - yield forecast.copy_dim(dim) - - @staticmethod - def extract_aggregate_target( - it_iterator: Iterator[pd.DataFrame], agg_fun: Callable - ) -> Iterator[pd.DataFrame]: - for i in it_iterator: - yield i.agg(agg_fun, axis=1) - - @staticmethod - def extract_aggregate_forecast( - forecast_iterator: Iterator[Forecast], agg_fun: Callable - ) -> Iterator[Forecast]: - for forecast in forecast_iterator: - yield forecast.copy_aggregate(agg_fun) - - @staticmethod - def peek(iterator: Iterator[Any]) -> Tuple[Any, Iterator[Any]]: - peeked_object = iterator.__next__() - iterator = chain([peeked_object], iterator) - return peeked_object, iterator - - @staticmethod - def get_target_dimensionality(forecast: Forecast) -> int: - target_dim = forecast.dim() - assert target_dim > 1, ( - f"the dimensionality of the forecast should be larger than 1, " - f"but got {target_dim}. " - f"Please use the Evaluator to evaluate 1D forecasts." - ) - return target_dim - - def get_eval_dims(self, target_dimensionality: int) -> List[int]: - eval_dims = ( - self._eval_dims - if self._eval_dims is not None - else list(range(0, target_dimensionality)) - ) - assert max(eval_dims) < target_dimensionality, ( - f"eval dims should range from 0 to target_dimensionality - 1, " - f"but got max eval_dim {max(eval_dims)}" - ) - return eval_dims - - def calculate_aggregate_multivariate_metrics( - self, - ts_iterator: Iterator[pd.DataFrame], - forecast_iterator: Iterator[Forecast], - agg_fun: Callable, - ) -> Dict[str, float]: - """ - - Parameters - ---------- - ts_iterator - Iterator over time series - forecast_iterator - Iterator over forecasts - agg_fun - aggregation function - Returns - ------- - Dict[str, float] - dictionary with aggregate datasets metrics - """ - agg_metrics, _ = super(MultivariateEvaluator, self).__call__( - self.extract_aggregate_target(ts_iterator, agg_fun), - self.extract_aggregate_forecast(forecast_iterator, agg_fun), - ) - return agg_metrics - - def calculate_aggregate_vector_metrics( - self, all_agg_metrics: Dict[str, float], all_metrics_per_ts: pd.DataFrame, - ) -> Dict[str, float]: - """ - - Parameters - ---------- - all_agg_metrics - dictionary with aggregate metrics of individual dimensions - all_metrics_per_ts - DataFrame containing metrics for all time series of all evaluated - dimensions - - Returns - ------- - Dict[str, float] - dictionary with aggregate metrics (of individual (evaluated) - dimensions and the entire vector) - """ - vector_aggregate_metrics, _ = self.get_aggregate_metrics(all_metrics_per_ts) - for key, value in vector_aggregate_metrics.items(): - all_agg_metrics[key] = value - return all_agg_metrics - - def __call__( - self, - ts_iterator: Iterable[pd.DataFrame], - fcst_iterator: Iterable[Forecast], - num_series=None, - ) -> Tuple[Dict[str, float], pd.DataFrame]: - ts_iterator = iter(ts_iterator) - fcst_iterator = iter(fcst_iterator) - - all_agg_metrics = dict() - all_metrics_per_ts = list() - - peeked_forecast, fcst_iterator = self.peek(fcst_iterator) - target_dimensionality = self.get_target_dimensionality(peeked_forecast) - eval_dims = self.get_eval_dims(target_dimensionality) - - ts_iterator_set = tee( - ts_iterator, target_dimensionality + len(self.target_agg_funcs) - ) - fcst_iterator_set = tee( - fcst_iterator, target_dimensionality + len(self.target_agg_funcs) - ) - - for dim in eval_dims: - agg_metrics, metrics_per_ts = super(MultivariateEvaluator, self).__call__( - self.extract_target_by_dim(ts_iterator_set[dim], dim), - self.extract_forecast_by_dim(fcst_iterator_set[dim], dim), - ) - - all_metrics_per_ts.append(metrics_per_ts) - - for metric, value in agg_metrics.items(): - all_agg_metrics[f"{dim}_{metric}"] = value - - all_metrics_per_ts = pd.concat(all_metrics_per_ts) - all_agg_metrics = self.calculate_aggregate_vector_metrics( - all_agg_metrics, all_metrics_per_ts - ) - - if self.target_agg_funcs: - multivariate_metrics = { - agg_fun_name: self.calculate_aggregate_multivariate_metrics( - ts_iterator_set[-(index + 1)], - fcst_iterator_set[-(index + 1)], - agg_fun, - ) - for index, (agg_fun_name, agg_fun) in enumerate( - self.target_agg_funcs.items() - ) - } - - for key, metric_dict in multivariate_metrics.items(): - prefix = f"m_{key}_" - for metric, value in metric_dict.items(): - all_agg_metrics[prefix + metric] = value - - return all_agg_metrics, all_metrics_per_ts - - -# This is required for the multiprocessing to work. -_worker_evaluator: Optional[Evaluator] = None - - -def _worker_init(evaluator: Evaluator): - global _worker_evaluator - _worker_evaluator = evaluator - - -def _worker_fun(inp: tuple): - ts, forecast = inp - global _worker_evaluator - assert isinstance( - _worker_evaluator, Evaluator - ), "Something went wrong with the worker initialization." - return _worker_evaluator.get_metrics_per_ts(ts, forecast) diff --git a/pts/exception.py b/pts/exception.py deleted file mode 100644 index 0116429..0000000 --- a/pts/exception.py +++ /dev/null @@ -1,3 +0,0 @@ -def assert_pts(condition: bool, message: str, *args, **kwargs) -> None: - if not condition: - raise Exception(message.format(*args, **kwargs)) diff --git a/pts/feature/__init__.py b/pts/feature/__init__.py index b061e76..3f16dfb 100644 --- a/pts/feature/__init__.py +++ b/pts/feature/__init__.py @@ -1,23 +1,4 @@ from .holiday import ( - SPECIAL_DATE_FEATURES, - SpecialDateFeatureSet, CustomDateFeatureSet, CustomHolidayFeatureSet, - squared_exponential_kernel, - exponential_kernel, ) -from .lag import get_lags_for_frequency, get_fourier_lags_for_frequency -from .time_feature import ( - DayOfMonth, - DayOfWeek, - DayOfYear, - HourOfDay, - MinuteOfHour, - MonthOfYear, - TimeFeature, - WeekOfYear, - FourierDateFeatures, - time_features_from_frequency_str, - fourier_time_features_from_frequency_str, -) -from .utils import get_granularity, get_seasonality diff --git a/pts/feature/holiday.py b/pts/feature/holiday.py index 14f262a..e4f8707 100644 --- a/pts/feature/holiday.py +++ b/pts/feature/holiday.py @@ -1,221 +1,9 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from typing import List, Callable - +from typing import Callable, List import numpy as np import pandas as pd -from pandas.tseries.holiday import ( - TH, - SU, - EasterMonday, - GoodFriday, - Holiday, - USColumbusDay, - USLaborDay, - USMartinLutherKingJr, - USMemorialDay, - USPresidentsDay, - USThanksgivingDay, -) -from pandas.tseries.offsets import DateOffset, Day, Easter +from pandas.tseries.holiday import Holiday -# This is 183 to cover half a year (in both directions), also for leap years -# plus a week and a half to cover holidays offset by a week e.g. easter etc -MAX_WINDOW = 192 - - -def distance_to_holiday(holiday): - def distance_to_day(index): - holiday_date = holiday.dates( - index - pd.Timedelta(days=MAX_WINDOW), - index + pd.Timedelta(days=MAX_WINDOW), - ) - assert ( - len(holiday_date) != 0 - ), f"No closest holiday for the date index {index} found." - # It sometimes returns two dates if it is exactly half a year after the - # holiday. In this case, the smaller distance (182 days) is returned. - return (index - holiday_date[0]).days - - return distance_to_day - - -EasterSunday = Holiday("Easter Sunday", month=1, day=1, offset=[Easter(), Day(0)]) -NewYearsDay = Holiday("New Years Day", month=1, day=1) -SuperBowl = Holiday("Superbowl", month=2, day=1, offset=DateOffset(weekday=SU(1))) -MothersDay = Holiday("Mothers Day", month=5, day=1, offset=DateOffset(weekday=SU(2))) -IndependenceDay = Holiday("Independence Day", month=7, day=4) -ChristmasEve = Holiday("Christmas", month=12, day=24) -ChristmasDay = Holiday("Christmas", month=12, day=25) -NewYearsEve = Holiday("New Years Eve", month=12, day=31) -BlackFriday = Holiday( - "Black Friday", month=11, day=1, offset=[pd.DateOffset(weekday=TH(4)), Day(1)] -) -CyberMonday = Holiday( - "Cyber Monday", month=11, day=1, offset=[pd.DateOffset(weekday=TH(4)), Day(4)], -) - - -NEW_YEARS_DAY = "new_years_day" -MARTIN_LUTHER_KING_DAY = "martin_luther_king_day" -SUPERBOWL = "superbowl" -PRESIDENTS_DAY = "presidents_day" -GOOD_FRIDAY = "good_friday" -EASTER_SUNDAY = "easter_sunday" -EASTER_MONDAY = "easter_monday" -MOTHERS_DAY = "mothers_day" -INDEPENDENCE_DAY = "independence_day" -LABOR_DAY = "labor_day" -MEMORIAL_DAY = "memorial_day" -COLUMBUS_DAY = "columbus_day" -THANKSGIVING = "thanksgiving" -CHRISTMAS_EVE = "christmas_eve" -CHRISTMAS_DAY = "christmas_day" -NEW_YEARS_EVE = "new_years_eve" -BLACK_FRIDAY = "black_friday" -CYBER_MONDAY = "cyber_monday" - - -SPECIAL_DATE_FEATURES = { - NEW_YEARS_DAY: distance_to_holiday(NewYearsDay), - MARTIN_LUTHER_KING_DAY: distance_to_holiday(USMartinLutherKingJr), - SUPERBOWL: distance_to_holiday(SuperBowl), - PRESIDENTS_DAY: distance_to_holiday(USPresidentsDay), - GOOD_FRIDAY: distance_to_holiday(GoodFriday), - EASTER_SUNDAY: distance_to_holiday(EasterSunday), - EASTER_MONDAY: distance_to_holiday(EasterMonday), - MOTHERS_DAY: distance_to_holiday(MothersDay), - INDEPENDENCE_DAY: distance_to_holiday(IndependenceDay), - LABOR_DAY: distance_to_holiday(USLaborDay), - MEMORIAL_DAY: distance_to_holiday(USMemorialDay), - COLUMBUS_DAY: distance_to_holiday(USColumbusDay), - THANKSGIVING: distance_to_holiday(USThanksgivingDay), - CHRISTMAS_EVE: distance_to_holiday(ChristmasEve), - CHRISTMAS_DAY: distance_to_holiday(ChristmasDay), - NEW_YEARS_EVE: distance_to_holiday(NewYearsEve), - BLACK_FRIDAY: distance_to_holiday(BlackFriday), - CYBER_MONDAY: distance_to_holiday(CyberMonday), -} - - -# Kernel functions -def indicator(distance): - return float(distance == 0) - - -def exponential_kernel(alpha=1.0, tol=1e-9): - def kernel(distance): - kernel_value = np.exp(-alpha * np.abs(distance)) - if kernel_value > tol: - return kernel_value - else: - return 0.0 - - return kernel - - -def squared_exponential_kernel(alpha=1.0, tol=1e-9): - def kernel(distance): - kernel_value = np.exp(-alpha * np.abs(distance) ** 2) - if kernel_value > tol: - return kernel_value - else: - return 0.0 - - return kernel - - -class SpecialDateFeatureSet: - """ - Implements calculation of holiday features. The SpecialDateFeatureSet is - applied on a pandas Series with Datetimeindex and returns a 2D array of - the shape (len(dates), num_features), where num_features are the number - of holidays. - - Note that for lower than daily granularity the distance to the holiday is - still computed on a per-day basis. - - Example use: - - >>> from pts.features import ( - ... squared_exponential_kernel, - ... SpecialDateFeatureSet, - ... CHRISTMAS_DAY, - ... CHRISTMAS_EVE - ... ) - >>> import pandas as pd - >>> sfs = SpecialDateFeatureSet([CHRISTMAS_EVE, CHRISTMAS_DAY]) - >>> date_indices = pd.date_range( - ... start="2016-12-24", - ... end="2016-12-31", - ... freq='D' - ... ) - >>> sfs(date_indices) - array([[1., 0., 0., 0., 0., 0., 0., 0.], - [0., 1., 0., 0., 0., 0., 0., 0.]]) - - Example use for using a squared exponential kernel: - - >>> kernel = squared_exponential_kernel(alpha=1.0) - >>> sfs = SpecialDateFeatureSet([CHRISTMAS_EVE, CHRISTMAS_DAY], kernel) - >>> sfs(date_indices) - array([[1.00000000e+00, 3.67879441e-01, 1.83156389e-02, 1.23409804e-04, - 1.12535175e-07, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], - [3.67879441e-01, 1.00000000e+00, 3.67879441e-01, 1.83156389e-02, - 1.23409804e-04, 1.12535175e-07, 0.00000000e+00, 0.00000000e+00]]) - - """ - - def __init__( - self, - feature_names: List[str], - kernel_function: Callable[[int], int] = indicator, - ): - """ - Parameters - ---------- - feature_names - list of strings with holiday names for which features should be created. - kernel_function - kernel function to pass the feature value based - on distance in days. Can be indicator function (default), - exponential_kernel, squared_exponential_kernel or user defined. - """ - self.feature_names = feature_names - self.num_features = len(feature_names) - self.kernel_function = kernel_function - - def __call__(self, dates): - """ - Transform a pandas series with timestamps to holiday features. - - Parameters - ---------- - dates - Pandas series with Datetimeindex timestamps. - """ - return np.vstack( - [ - np.hstack( - [ - self.kernel_function(SPECIAL_DATE_FEATURES[feat_name](index)) - for index in dates - ] - ) - for feat_name in self.feature_names - ] - ) +from gluonts.time_feature.holiday import indicator, distance_to_holiday class CustomDateFeatureSet: @@ -230,7 +18,7 @@ class CustomDateFeatureSet: Example use: >>> import pandas as pd - >>> cfs = CustomDateFeatureSet([pd.to_datetime('20191129', format='%Y%m%d'), + >>> cfs = CustomDateFeatureSet([pd.to_datetime('20191129', format='%Y%m%d'), ... pd.to_datetime('20200101', format='%Y%m%d')]) >>> date_indices = pd.date_range( ... start="2019-11-24", @@ -245,7 +33,7 @@ class CustomDateFeatureSet: Example use for using a squared exponential kernel: >>> kernel = squared_exponential_kernel(alpha=0.5) - >>> cfs = CustomDateFeatureSet([pd.to_datetime('20191129', format='%Y%m%d'), + >>> cfs = CustomDateFeatureSet([pd.to_datetime('20191129', format='%Y%m%d'), ... pd.to_datetime('20200101', format='%Y%m%d')], kernel) >>> cfs(date_indices) array([[3.72665317e-06, 3.35462628e-04, 1.11089965e-02, 1.35335283e-01, @@ -287,20 +75,14 @@ class CustomDateFeatureSet: dates Pandas series with Datetimeindex timestamps. """ - return ( - np.vstack( - [ - np.hstack( - [ - self.kernel_function((index - ref_date).days) - for index in dates - ] - ) - for ref_date in self.reference_dates - ] - ) - .sum(0, keepdims=True) - ) + return np.vstack( + [ + np.hstack( + [self.kernel_function((index - ref_date).days) for index in dates] + ) + for ref_date in self.reference_dates + ] + ).sum(0, keepdims=True) class CustomHolidayFeatureSet: @@ -383,4 +165,3 @@ class CustomHolidayFeatureSet: for custom_holiday in self.custom_holidays ] ) - diff --git a/pts/feature/lag.py b/pts/feature/lag.py deleted file mode 100644 index c4c0982..0000000 --- a/pts/feature/lag.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Standard library imports -from typing import List, Optional - -# Third-party imports -import numpy as np -from pandas.tseries.frequencies import to_offset - -from .utils import get_granularity - - -def _make_lags(middle: int, delta: int) -> np.ndarray: - """ - Create a set of lags around a middle point including +/- delta - """ - return np.arange(middle - delta, middle + delta + 1).tolist() - - -def get_lags_for_frequency( - freq_str: str, lag_ub: int = 1200, num_lags: Optional[int] = None -) -> List[int]: - """ - Generates a list of lags that that are appropriate for the given frequency string. - - By default all frequencies have the following lags: [1, 2, 3, 4, 5, 6, 7]. - Remaining lags correspond to the same `season` (+/- `delta`) in previous `k` cycles. - Here `delta` and `k` are chosen according to the existing code. - - Parameters - ---------- - - freq_str - Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. - - lag_ub - The maximum value for a lag. - - num_lags - Maximum number of lags; by default all generated lags are returned - """ - - multiple, granularity = get_granularity(freq_str) - - # Lags are target values at the same `season` (+/- delta) but in the previous cycle. - def _make_lags_for_minute(multiple, num_cycles=3): - # We use previous ``num_cycles`` hours to generate lags - return [_make_lags(k * 60 // multiple, 2) for k in range(1, num_cycles + 1)] - - def _make_lags_for_hour(multiple, num_cycles=7): - # We use previous ``num_cycles`` days to generate lags - return [_make_lags(k * 24 // multiple, 1) for k in range(1, num_cycles + 1)] - - def _make_lags_for_day(multiple, num_cycles=4): - # We use previous ``num_cycles`` weeks to generate lags - # We use the last month (in addition to 4 weeks) to generate lag. - return [_make_lags(k * 7 // multiple, 1) for k in range(1, num_cycles + 1)] + [ - _make_lags(30 // multiple, 1) - ] - - def _make_lags_for_week(multiple, num_cycles=3): - # We use previous ``num_cycles`` years to generate lags - # Additionally, we use previous 4, 8, 12 weeks - return [_make_lags(k * 52 // multiple, 1) for k in range(1, num_cycles + 1)] + [ - [4 // multiple, 8 // multiple, 12 // multiple] - ] - - def _make_lags_for_month(multiple, num_cycles=3): - # We use previous ``num_cycles`` years to generate lags - return [_make_lags(k * 12 // multiple, 1) for k in range(1, num_cycles + 1)] - - # multiple, granularity = get_granularity(freq_str) - offset = to_offset(freq_str) - - if offset.name == "M": - lags = _make_lags_for_month(offset.n) - elif offset.name == "W-SUN" or offset.name == "W-MON": - lags = _make_lags_for_week(offset.n) - elif offset.name == "D": - lags = _make_lags_for_day(offset.n) + _make_lags_for_week(offset.n / 7.0) - elif offset.name == "B": - # todo find good lags for business day - lags = [] - elif offset.name == "H": - lags = ( - _make_lags_for_hour(offset.n) - + _make_lags_for_day(offset.n / 24.0) - + _make_lags_for_week(offset.n / (24.0 * 7)) - ) - # minutes - elif offset.name == "T": - lags = ( - _make_lags_for_minute(offset.n) - + _make_lags_for_hour(offset.n / 60.0) - + _make_lags_for_day(offset.n / (60.0 * 24)) - + _make_lags_for_week(offset.n / (60.0 * 24 * 7)) - ) - else: - raise Exception("invalid frequency") - - # flatten lags list and filter - lags = [int(lag) for sub_list in lags for lag in sub_list if 7 < lag <= lag_ub] - lags = [1, 2, 3, 4, 5, 6, 7] + sorted(list(set(lags))) - - return lags[:num_lags] - - -def get_fourier_lags_for_frequency(freq_str: str, num_lags: Optional[int] = None) -> List[int]: - offset = to_offset(freq_str) - granularity = offset.name - - if granularity == "M": - lags = [[1, 12]] - elif granularity == "D": - lags = [[1, 7, 14]] - elif granularity == "B": - lags = [[1, 2]] - elif granularity == "H": - lags = [[1, 24, 168]] - elif granularity == "min": - lags = [[1, 4, 12, 24, 48]] - else: - lags = [[1]] - - # use less lags - output_lags = list([int(lag) for sub_list in lags for lag in sub_list]) - output_lags = sorted(list(set(output_lags))) - return output_lags[:num_lags] diff --git a/pts/feature/time_feature.py b/pts/feature/time_feature.py deleted file mode 100644 index cc9cec9..0000000 --- a/pts/feature/time_feature.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from abc import ABC, abstractmethod -from typing import List - -import numpy as np -import pandas as pd -from pandas.tseries.frequencies import to_offset - -from pts.core.component import validated -from .utils import get_granularity - - -class TimeFeature(ABC): - @validated() - def __init__(self, normalized: bool = True): - self.normalized = normalized - - @abstractmethod - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - pass - - -class MinuteOfHour(TimeFeature): - """ - Minute of hour encoded as value between [-0.5, 0.5] - """ - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - if self.normalized: - return index.minute / 59.0 - 0.5 - else: - return index.minute.map(float) - - -class HourOfDay(TimeFeature): - """ - Hour of day encoded as value between [-0.5, 0.5] - """ - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - if self.normalized: - return index.hour / 23.0 - 0.5 - else: - return index.hour.map(float) - - -class DayOfWeek(TimeFeature): - """ - Hour of day encoded as value between [-0.5, 0.5] - """ - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - if self.normalized: - return index.dayofweek / 6.0 - 0.5 - else: - return index.dayofweek.map(float) - - -class DayOfMonth(TimeFeature): - """ - Day of month encoded as value between [-0.5, 0.5] - """ - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - if self.normalized: - return index.day / 30.0 - 0.5 - else: - return index.day.map(float) - - -class DayOfYear(TimeFeature): - """ - Day of year encoded as value between [-0.5, 0.5] - """ - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - if self.normalized: - return index.dayofyear / 364.0 - 0.5 - else: - return index.dayofyear.map(float) - - -class MonthOfYear(TimeFeature): - """ - Month of year encoded as value between [-0.5, 0.5] - """ - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - if self.normalized: - return index.month / 11.0 - 0.5 - else: - return index.month.map(float) - - -class WeekOfYear(TimeFeature): - """ - Week of year encoded as value between [-0.5, 0.5] - """ - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - if self.normalized: - return pd.Int64Index(index.isocalendar().week) / 51.0 - 0.5 - else: - return pd.Int64Index(index.isocalendar().week).map(float) - - -class FourierDateFeatures(TimeFeature): - @validated() - def __init__(self, freq: str) -> None: - super().__init__() - # reoccurring freq - freqs = [ - "month", - "day", - "hour", - "minute", - "weekofyear", - "weekday", - "dayofweek", - "dayofyear", - "daysinmonth", - ] - - assert freq in freqs - self.freq = freq - - def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: - values = getattr(index, self.freq) - num_values = max(values) + 1 - steps = [x * 2.0 * np.pi / num_values for x in values] - return np.vstack([np.cos(steps), np.sin(steps)]) - - -def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: - """ - Returns a list of time features that will be appropriate for the given frequency string. - - Parameters - ---------- - - freq_str - Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. - - """ - _, granularity = get_granularity(freq_str) - if granularity == "M": - feature_classes = [MonthOfYear] - elif granularity == "W": - feature_classes = [DayOfMonth, WeekOfYear] - elif granularity in ["D", "B"]: - feature_classes = [DayOfWeek, DayOfMonth, DayOfYear] - elif granularity == "H": - feature_classes = [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear] - elif granularity in ["min", "T"]: - feature_classes = [MinuteOfHour, HourOfDay, DayOfWeek, DayOfMonth, DayOfYear] - else: - supported_freq_msg = f""" - Unsupported frequency {freq_str} - - The following frequencies are supported: - - M - monthly - W - week - D - daily - H - hourly - min - minutely - """ - raise RuntimeError(supported_freq_msg) - - return [cls() for cls in feature_classes] - - -def fourier_time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: - offset = to_offset(freq_str) - granularity = offset.name - - features = { - "M": ["weekofyear"], - "W-SUN": ["daysinmonth", "weekofyear"], - "W-MON": ["daysinmonth", "weekofyear"], - "D": ["dayofweek"], - "B": ["dayofweek", "dayofyear"], - "H": ["hour", "dayofweek"], - "min": ["minute", "hour", "dayofweek"], - "T": ["minute", "hour", "dayofweek"], - } - - assert granularity in features, f"freq {granularity} not supported" - - feature_classes: List[TimeFeature] = [ - FourierDateFeatures(freq=freq) for freq in features[granularity] - ] - return feature_classes diff --git a/pts/feature/utils.py b/pts/feature/utils.py deleted file mode 100644 index d30fdf2..0000000 --- a/pts/feature/utils.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import re -from functools import lru_cache -from typing import Tuple - - -def get_granularity(freq_str: str) -> Tuple[int, str]: - """ - Splits a frequency string such as "7D" into the multiple 7 and the base - granularity "D". - - Parameters - ---------- - - freq_str - Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. - """ - freq_regex = r"\s*((\d+)?)\s*([^\d]\w*)" - m = re.match(freq_regex, freq_str) - assert m is not None, "Cannot parse frequency string: %s" % freq_str - groups = m.groups() - multiple = int(groups[1]) if groups[1] is not None else 1 - granularity = groups[2] - return multiple, granularity - - -@lru_cache() -def get_seasonality(freq: str) -> int: - """ - Returns the default seasonality for a given freq str. E.g. for - - 2H -> 12 - - """ - match = re.match(r"(\d*)(\w+)", freq) - assert match, "Cannot match freq regex" - mult, base_freq = match.groups() - multiple = int(mult) if mult else 1 - - seasonalities = {"H": 24, "D": 1, "W": 1, "M": 12, "B": 5} - if base_freq in seasonalities: - seasonality = seasonalities[base_freq] - else: - seasonality = 1 - if seasonality % multiple != 0: - # logging.warning( - # f"multiple {multiple} does not divide base " - # f"seasonality {seasonality}." - # f"Falling back to seasonality 1" - # ) - return 1 - return seasonality // multiple diff --git a/pts/model/__init__.py b/pts/model/__init__.py index 1c99bdb..43624d4 100644 --- a/pts/model/__init__.py +++ b/pts/model/__init__.py @@ -1,5 +1,2 @@ -from .estimator import Estimator, PTSEstimator -from .forecast import Forecast, SampleForecast, QuantileForecast, DistributionForecast -from .predictor import Predictor, PTSPredictor -from .quantile import Quantile -from .utils import get_module_forward_input_names, copy_parameters, weighted_average +from .utils import get_module_forward_input_names, weighted_average +from .estimator import PyTorchEstimator diff --git a/pts/model/deepar/deepar_estimator.py b/pts/model/deepar/deepar_estimator.py index 965f5b9..7e8c24b 100644 --- a/pts/model/deepar/deepar_estimator.py +++ b/pts/model/deepar/deepar_estimator.py @@ -1,19 +1,17 @@ +from pts.model.utils import get_module_forward_input_names from typing import List, Optional import numpy as np import torch import torch.nn as nn -from pts import Trainer -from pts.dataset import FieldName -from pts.feature import ( +from gluonts.dataset.field_names import FieldName +from gluonts.time_feature import ( TimeFeature, get_lags_for_frequency, time_features_from_frequency_str, ) -from pts.model import PTSEstimator, Predictor, PTSPredictor, copy_parameters -from pts.modules import DistributionOutput, StudentTOutput -from pts.transform import ( +from gluonts.transform import ( Transformation, Chain, RemoveFields, @@ -26,10 +24,19 @@ from pts.transform import ( InstanceSplitter, ExpectedNumInstanceSampler, ) +from gluonts.torch.support.util import copy_parameters +from gluonts.torch.model.predictor import PyTorchPredictor +from gluonts.torch.modules.distribution_output import DistributionOutput +from gluonts.model.predictor import Predictor + +from pts import Trainer +from pts.model import PyTorchEstimator +from pts.modules import StudentTOutput + from .deepar_network import DeepARTrainingNetwork, DeepARPredictionNetwork -class DeepAREstimator(PTSEstimator): +class DeepAREstimator(PyTorchEstimator): def __init__( self, freq: str, @@ -115,10 +122,14 @@ class DeepAREstimator(PTSEstimator): ) + [ AsNumpyArray( - field=FieldName.FEAT_STATIC_CAT, expected_ndim=1, dtype=np.long, + field=FieldName.FEAT_STATIC_CAT, + expected_ndim=1, + dtype=np.long, ), AsNumpyArray( - field=FieldName.FEAT_STATIC_REAL, expected_ndim=1, dtype=self.dtype, + field=FieldName.FEAT_STATIC_REAL, + expected_ndim=1, + dtype=self.dtype, ), AsNumpyArray( field=FieldName.TARGET, @@ -218,13 +229,14 @@ class DeepAREstimator(PTSEstimator): ).to(device) copy_parameters(trained_network, prediction_network) + input_names = get_module_forward_input_names(prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, + input_names=input_names, prediction_net=prediction_network, batch_size=self.trainer.batch_size, freq=self.freq, prediction_length=self.prediction_length, device=device, - dtype=self.dtype, ) diff --git a/pts/model/deepar/deepar_network.py b/pts/model/deepar/deepar_network.py index 1a68742..bd98e05 100644 --- a/pts/model/deepar/deepar_network.py +++ b/pts/model/deepar/deepar_network.py @@ -5,9 +5,10 @@ import torch import torch.nn as nn from torch.distributions import Distribution -from pts.core.component import validated +from gluonts.core.component import validated +from gluonts.torch.modules.distribution_output import DistributionOutput from pts.model import weighted_average -from pts.modules import DistributionOutput, MeanScaler, NOPScaler, FeatureEmbedder +from pts.modules import MeanScaler, NOPScaler, FeatureEmbedder def prod(xs): @@ -18,7 +19,6 @@ def prod(xs): class DeepARNetwork(nn.Module): - @validated() def __init__( self, @@ -144,7 +144,7 @@ class DeepARNetwork(nn.Module): past_time_feat[:, self.history_length - self.context_length :, ...], future_time_feat, ), - dim=1 + dim=1, ) sequence = torch.cat((past_target, future_target), dim=1) sequence_length = self.history_length + self.prediction_length @@ -154,7 +154,7 @@ class DeepARNetwork(nn.Module): sequence=sequence, sequence_length=sequence_length, indices=self.lags_seq, - subsequences_length=subsequences_length + subsequences_length=subsequences_length, ) # scale is computed on the context length last units of the past target diff --git a/pts/model/deepvar/deepvar_estimator.py b/pts/model/deepvar/deepvar_estimator.py index e1aaed6..5371745 100644 --- a/pts/model/deepvar/deepvar_estimator.py +++ b/pts/model/deepvar/deepvar_estimator.py @@ -10,7 +10,7 @@ from pts.feature import ( fourier_time_features_from_frequency_str, get_fourier_lags_for_frequency, ) -from pts.model import PTSEstimator, PTSPredictor, copy_parameters +from pts.model import PyTorchEstimator, PyTorchPredictor, copy_parameters from pts.modules import DistributionOutput, LowRankMultivariateNormalOutput from pts.transform import ( Transformation, @@ -34,7 +34,7 @@ from pts.transform import ( from .deepvar_network import DeepVARTrainingNetwork, DeepVARPredictionNetwork -class DeepVAREstimator(PTSEstimator): +class DeepVAREstimator(PyTorchEstimator): def __init__( self, input_size: int, @@ -199,7 +199,9 @@ class DeepVAREstimator(PTSEstimator): field_name="target_dimension_indicator", target_field=FieldName.TARGET, ), - AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1, dtype=np.long), + AsNumpyArray( + field=FieldName.FEAT_STATIC_CAT, expected_ndim=1, dtype=np.long + ), AsNumpyArray(field=FieldName.FEAT_STATIC_REAL, expected_ndim=1), InstanceSplitter( target_field=FieldName.TARGET, @@ -242,7 +244,7 @@ class DeepVAREstimator(PTSEstimator): transformation: Transformation, trained_network: DeepVARTrainingNetwork, device: torch.device, - ) -> PTSPredictor: + ) -> PyTorchPredictor: prediction_network = DeepVARPredictionNetwork( input_size=self.input_size, target_dim=self.target_dim, @@ -263,7 +265,7 @@ class DeepVAREstimator(PTSEstimator): copy_parameters(trained_network, prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, diff --git a/pts/model/deepvar/deepvar_network.py b/pts/model/deepvar/deepvar_network.py index 1f403d5..0693f83 100644 --- a/pts/model/deepvar/deepvar_network.py +++ b/pts/model/deepvar/deepvar_network.py @@ -3,7 +3,7 @@ from typing import List, Optional, Tuple, Union import torch import torch.nn as nn -from pts.core.component import validated +from gluonts.core.component import validated from pts.model import weighted_average from pts.modules import DistributionOutput, MeanScaler, NOPScaler, FeatureEmbedder @@ -250,7 +250,10 @@ class DeepVARTrainingNetwork(nn.Module): subsequences_length = self.context_length else: time_feat = torch.cat( - (past_time_feat[:, -self.context_length :, ...], future_time_feat,), + ( + past_time_feat[:, -self.context_length :, ...], + future_time_feat, + ), dim=1, ) sequence = torch.cat((past_target_cdf, future_target_cdf), dim=1) @@ -285,7 +288,9 @@ class DeepVARTrainingNetwork(nn.Module): return outputs, states, scale, lags_scaled, inputs def distr( - self, rnn_outputs: torch.Tensor, scale: torch.Tensor, + self, + rnn_outputs: torch.Tensor, + scale: torch.Tensor, ): """ Returns the distribution of DeepVAR with respect to the RNN outputs. @@ -382,7 +387,8 @@ class DeepVARTrainingNetwork(nn.Module): # put together target sequence # (batch_size, seq_len, target_dim) target = torch.cat( - (past_target_cdf[:, -self.context_length :, ...], future_target_cdf), dim=1, + (past_target_cdf[:, -self.context_length :, ...], future_target_cdf), + dim=1, ) # assert_shape(target, (-1, seq_len, self.target_dim)) @@ -507,7 +513,8 @@ class DeepVARPredictionNetwork(DeepVARTrainingNetwork): ) distr, distr_args = self.distr( - rnn_outputs=rnn_outputs, scale=repeated_scale, + rnn_outputs=rnn_outputs, + scale=repeated_scale, ) # (batch_size, 1, target_dim) @@ -524,7 +531,12 @@ class DeepVARPredictionNetwork(DeepVARTrainingNetwork): # (batch_size, num_samples, prediction_length, target_dim) return samples.reshape( - (-1, self.num_parallel_samples, self.prediction_length, self.target_dim,) + ( + -1, + self.num_parallel_samples, + self.prediction_length, + self.target_dim, + ) ) def forward( diff --git a/pts/model/estimator.py b/pts/model/estimator.py index a2d40f1..631b913 100644 --- a/pts/model/estimator.py +++ b/pts/model/estimator.py @@ -1,73 +1,38 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from abc import ABC, abstractmethod -from typing import NamedTuple +from typing import NamedTuple, Optional +from functools import partial import numpy as np + import torch import torch.nn as nn -from torch.utils.data import DataLoader + +from gluonts.core.component import validated +from gluonts.dataset.common import Dataset +from gluonts.dataset.loader import TrainDataLoader, ValidationDataLoader +from gluonts.model.estimator import Estimator +from gluonts.torch.model.predictor import PyTorchPredictor +from gluonts.torch.batchify import batchify +from gluonts.transform import SelectFields, Transformation from pts import Trainer -from pts.dataset import Dataset, TransformedIterableDataset -from pts.transform import Transformation -from .predictor import Predictor -from .utils import get_module_forward_input_names - - -class Estimator(ABC): - prediction_length: int - freq: str - - @abstractmethod - def train(self, training_data: Dataset) -> Predictor: - pass - - -class DummyEstimator(Estimator): - """ - An `Estimator` that, upon training, simply returns a pre-constructed - `Predictor`. - - Parameters - ---------- - predictor_cls - `Predictor` class to instantiate. - **kwargs - Keyword arguments to pass to the predictor constructor. - """ - - def __init__(self, predictor_cls: type, **kwargs) -> None: - self.predictor = predictor_cls(**kwargs) - - def train(self, training_data: Dataset) -> Predictor: - return self.predictor +from pts.model import get_module_forward_input_names class TrainOutput(NamedTuple): transformation: Transformation trained_net: nn.Module - predictor: Predictor + predictor: PyTorchPredictor -class PTSEstimator(Estimator): - def __init__(self, trainer: Trainer, dtype: np.dtype = np.float32) -> None: +class PyTorchEstimator(Estimator): + @validated() + def __init__( + self, trainer: Trainer, lead_time: int = 0, dtype: np.dtype = np.float32 + ) -> None: + super().__init__(lead_time=lead_time) self.trainer = trainer self.dtype = dtype - @abstractmethod def create_transformation(self) -> Transformation: """ Create and return the transformation needed for training and inference. @@ -78,9 +43,8 @@ class PTSEstimator(Estimator): The transformation that will be applied entry-wise to datasets, at training and inference time. """ - pass + raise NotImplementedError - @abstractmethod def create_training_network(self, device: torch.device) -> nn.Module: """ Create and return the network used for training (i.e., computing the @@ -91,15 +55,14 @@ class PTSEstimator(Estimator): nn.Module The network that computes the loss given input data. """ - pass + raise NotImplementedError - @abstractmethod def create_predictor( self, transformation: Transformation, trained_network: nn.Module, device: torch.device, - ) -> Predictor: + ) -> PyTorchPredictor: """ Create and return a predictor object. @@ -108,32 +71,56 @@ class PTSEstimator(Estimator): Predictor A predictor wrapping a `nn.Module` used for inference. """ - pass + raise NotImplementedError - def train_model(self, training_data: Dataset) -> TrainOutput: + def train_model( + self, + training_data: Dataset, + validation_data: Optional[Dataset] = None, + num_workers: Optional[int] = None, + num_prefetch: Optional[int] = None, + shuffle_buffer_length: Optional[int] = None, + **kwargs, + ) -> TrainOutput: transformation = self.create_transformation() - transformation.estimate(iter(training_data)) - training_iter_dataset = TransformedIterableDataset( - dataset=training_data, - is_train=True, - transform=transformation - ) - - training_data_loader = DataLoader( - training_iter_dataset, - batch_size=self.trainer.batch_size, - num_workers=self.trainer.num_workers, - pin_memory=self.trainer.pin_memory - ) - - # ensure that the training network is created on the same device trained_net = self.create_training_network(self.trainer.device) + input_names = get_module_forward_input_names(trained_net) + + training_data_loader = TrainDataLoader( + dataset=training_data, + transform=transformation + SelectFields(input_names), + batch_size=self.trainer.batch_size, + stack_fn=partial( + batchify, + device=self.trainer.device, + ), + num_workers=num_workers, + num_prefetch=num_prefetch, + shuffle_buffer_length=shuffle_buffer_length, + **kwargs, + ) + + validation_data_loader = None + if validation_data is not None: + validation_data_loader = ValidationDataLoader( + dataset=validation_data, + transform=transformation + SelectFields(input_names), + batch_size=self.trainer.batch_size, + stack_fn=partial( + batchify, + device=self.trainer.device, + ), + num_workers=num_workers, + num_prefetch=num_prefetch, + **kwargs, + ) + self.trainer( net=trained_net, - input_names=get_module_forward_input_names(trained_net), - data_loader=training_data_loader, + train_iter=training_data_loader, + validation_iter=validation_data_loader, ) return TrainOutput( @@ -144,5 +131,20 @@ class PTSEstimator(Estimator): ), ) - def train(self, training_data: Dataset) -> Predictor: - return self.train_model(training_data).predictor + def train( + self, + training_data: Dataset, + validation_data: Optional[Dataset] = None, + num_workers: Optional[int] = None, + num_prefetch: Optional[int] = None, + shuffle_buffer_length: Optional[int] = None, + **kwargs, + ) -> PyTorchPredictor: + return self.train_model( + training_data, + validation_data, + num_workers, + num_prefetch, + shuffle_buffer_length, + **kwargs, + ).predictor diff --git a/pts/model/forecast.py b/pts/model/forecast.py deleted file mode 100644 index 847a7e5..0000000 --- a/pts/model/forecast.py +++ /dev/null @@ -1,552 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from abc import ABC, abstractmethod -from enum import Enum -from typing import Dict, List, Optional, Set, Union, Callable - -import numpy as np -import pandas as pd -import torch -from pydantic import BaseModel, Field -from torch.distributions import Distribution - -from .quantile import Quantile - - -class OutputType(str, Enum): - mean = "mean" - samples = "samples" - quantiles = "quantiles" - - -class Config(BaseModel): - num_samples: int = Field(100, alias="num_eval_samples") - output_types: Set[OutputType] = {"quantiles", "mean"} - # FIXME: validate list elements - quantiles: List[str] = ["0.1", "0.5", "0.9"] - - class Config: - allow_population_by_field_name = True - # store additional fields - extra = "allow" - - -class Forecast(ABC): - start_date: pd.Timestamp - freq: str - item_id: Optional[str] - info: Optional[Dict] - prediction_length: int - mean: np.ndarray - _index = None - - @abstractmethod - def quantile(self, q: Union[float, str]) -> np.ndarray: - """ - Computes a quantile from the predicted distribution. - - Parameters - ---------- - q - Quantile to compute. - - Returns - ------- - numpy.ndarray - Value of the quantile across the prediction range. - """ - pass - - def quantile_ts(self, q: Union[float, str]) -> pd.Series: - return pd.Series(data=self.quantile(q), index=self.index) - - @property - def median(self) -> np.ndarray: - return self.quantile(0.5) - - def plot( - self, - prediction_intervals=(50.0, 90.0), - show_mean=False, - color="b", - label=None, - output_file=None, - *args, - **kwargs, - ): - """ - Plots the median of the forecast as well as confidence bounds. - (requires matplotlib and pandas). - - Parameters - ---------- - prediction_intervals : float or list of floats in [0, 100] - Confidence interval size(s). If a list, it will stack the error - plots for each confidence interval. Only relevant for error styles - with "ci" in the name. - show_mean : boolean - Whether to also show the mean of the forecast. - color : matplotlib color name or dictionary - The color used for plotting the forecast. - label : string - A label (prefix) that is used for the forecast - output_file : str or None, default None - Output path for the plot file. If None, plot is not saved to file. - args : - Other arguments are passed to main plot() call - kwargs : - Other keyword arguments are passed to main plot() call - """ - - # matplotlib==2.0.* gives errors in Brazil builds and has to be - # imported locally - import matplotlib.pyplot as plt - - label_prefix = "" if label is None else label + "-" - - for c in prediction_intervals: - assert 0.0 <= c <= 100.0 - - ps = [50.0] + [ - 50.0 + f * c / 2.0 for c in prediction_intervals for f in [-1.0, +1.0] - ] - percentiles_sorted = sorted(set(ps)) - - def alpha_for_percentile(p): - return (p / 100.0) ** 0.3 - - ps_data = [self.quantile(p / 100.0) for p in percentiles_sorted] - i_p50 = len(percentiles_sorted) // 2 - - p50_data = ps_data[i_p50] - p50_series = pd.Series(data=p50_data, index=self.index) - p50_series.plot(color=color, ls="-", label=f"{label_prefix}median") - - if show_mean: - mean_data = np.mean(self._sorted_samples, axis=0) - pd.Series(data=mean_data, index=self.index).plot( - color=color, ls=":", label=f"{label_prefix}mean", *args, **kwargs, - ) - - for i in range(len(percentiles_sorted) // 2): - ptile = percentiles_sorted[i] - alpha = alpha_for_percentile(ptile) - plt.fill_between( - self.index, - ps_data[i], - ps_data[-i - 1], - facecolor=color, - alpha=alpha, - interpolate=True, - *args, - **kwargs, - ) - # Hack to create labels for the error intervals. - # Doesn't actually plot anything, because we only pass a single data point - pd.Series(data=p50_data[:1], index=self.index[:1]).plot( - color=color, - alpha=alpha, - linewidth=10, - label=f"{label_prefix}{100 - ptile * 2}%", - *args, - **kwargs, - ) - if output_file: - plt.savefig(output_file) - - @property - def index(self) -> pd.DatetimeIndex: - if self._index is None: - self._index = pd.date_range( - self.start_date, periods=self.prediction_length, freq=self.freq - ) - return self._index - - def as_json_dict(self, config: "Config") -> dict: - result = {} - - if OutputType.mean in config.output_types: - result["mean"] = self.mean.tolist() - - if OutputType.quantiles in config.output_types: - quantiles = map(Quantile.parse, config.quantiles) - - result["quantiles"] = { - quantile.name: self.quantile(quantile.value).tolist() - for quantile in quantiles - } - - if OutputType.samples in config.output_types: - result["samples"] = [] - - return result - - -class SampleForecast(Forecast): - """ - A `Forecast` object, where the predicted distribution is represented - internally as samples. - - Parameters - ---------- - samples - Array of size (num_samples, prediction_length) - start_date - start of the forecast - freq - forecast frequency - info - additional information that the forecaster may provide e.g. estimated - parameters, number of iterations ran etc. - """ - - def __init__( - self, - samples: Union[torch.Tensor, np.ndarray], - start_date: pd.Timestamp, - freq: str, - item_id: Optional[str] = None, - info: Optional[Dict] = None, - ) -> None: - assert isinstance( - samples, (np.ndarray, torch.Tensor) - ), "samples should be either a numpy array or an torch tensor" - assert ( - len(np.shape(samples)) == 2 or len(np.shape(samples)) == 3 - ), "samples should be a 2-dimensional or 3-dimensional array. Dimensions found: {}".format( - len(np.shape(samples)) - ) - self.samples = ( - samples if (isinstance(samples, np.ndarray)) else samples.cpu().numpy() - ) - self._sorted_samples_value = None - self._mean = None - self._dim = None - self.item_id = item_id - self.info = info - - assert isinstance( - start_date, pd.Timestamp - ), "start_date should be a pandas Timestamp object" - self.start_date = start_date - - assert isinstance(freq, str), "freq should be a string" - self.freq = freq - - @property - def _sorted_samples(self): - if self._sorted_samples_value is None: - self._sorted_samples_value = np.sort(self.samples, axis=0) - return self._sorted_samples_value - - @property - def num_samples(self): - """ - The number of samples representing the forecast. - """ - return self.samples.shape[0] - - @property - def prediction_length(self): - """ - Time length of the forecast. - """ - return self.samples.shape[1] - - @property - def mean(self) -> np.ndarray: - """ - Forecast mean. - """ - if self._mean is not None: - return self._mean - else: - return np.mean(self.samples, axis=0) - - @property - def mean_ts(self) -> pd.Series: - """ - Forecast mean, as a pandas.Series object. - """ - return pd.Series(data=self.mean, index=self.index) - - def quantile(self, q: Union[float, str]) -> np.ndarray: - q = Quantile.parse(q).value - sample_idx = int(np.round((self.num_samples - 1) * q)) - return self._sorted_samples[sample_idx, :] - - def copy_dim(self, dim: int) -> "SampleForecast": - """ - Returns a new Forecast object with only the selected sub-dimension. - - Parameters - ---------- - dim - The returned forecast object will only represent this dimension. - """ - if len(self.samples.shape) == 2: - samples = self.samples - else: - target_dim = self.samples.shape[2] - assert dim < target_dim, ( - f"must set 0 <= dim < target_dim, but got dim={dim}," - f" target_dim={target_dim}" - ) - samples = self.samples[:, :, dim] - - return SampleForecast( - samples=samples, - start_date=self.start_date, - freq=self.freq, - item_id=self.item_id, - info=self.info, - ) - - def copy_aggregate(self, agg_fun: Callable) -> "SampleForecast": - """ - Returns a new Forecast object with a time series aggregated over the - dimension axis. - - Parameters - ---------- - agg_fun - Aggregation function that defines the aggregation operation - (typically mean or sum). - """ - if len(self.samples.shape) == 2: - samples = self.samples - else: - # Aggregate over target dimension axis - samples = agg_fun(self.samples, axis=2) - return SampleForecast( - samples=samples, - start_date=self.start_date, - freq=self.freq, - item_id=self.item_id, - info=self.info, - ) - - def dim(self) -> int: - """ - Returns the dimensionality of the forecast object. - """ - if self._dim is not None: - return self._dim - else: - if len(self.samples.shape) == 2: - # univariate target - # shape: (num_samples, prediction_length) - return 1 - else: - # multivariate target - # shape: (num_samples, prediction_length, target_dim) - return self.samples.shape[2] - - def as_json_dict(self, config: "Config") -> dict: - result = super().as_json_dict(config) - - if OutputType.samples in config.output_types: - result["samples"] = self.samples.tolist() - - return result - - def __repr__(self): - return ", ".join( - [ - f"SampleForecast({self.samples!r})", - f"{self.start_date!r}", - f"{self.freq!r}", - f"item_id={self.item_id!r}", - f"info={self.info!r})", - ] - ) - - -class QuantileForecast(Forecast): - """ - A Forecast that contains arrays (i.e. time series) for quantiles and mean - - Parameters - ---------- - forecast_arrays - An array of forecasts - start_date - start of the forecast - freq - forecast frequency - forecast_keys - A list of quantiles of the form '0.1', '0.9', etc., - and potentially 'mean'. Each entry corresponds to one array in - forecast_arrays. - info - additional information that the forecaster may provide e.g. estimated - parameters, number of iterations ran etc. - """ - - def __init__( - self, - forecast_arrays: np.ndarray, - start_date: pd.Timestamp, - freq: str, - forecast_keys: List[str], - item_id: Optional[str] = None, - info: Optional[Dict] = None, - ) -> None: - self.forecast_array = forecast_arrays - self.start_date = pd.Timestamp(start_date, freq=freq) - self.freq = freq - - # normalize keys - self.forecast_keys = [ - Quantile.from_str(key).name if key != "mean" else key - for key in forecast_keys - ] - self.item_id = item_id - self.info = info - self._dim = None - - shape = self.forecast_array.shape - assert shape[0] == len(self.forecast_keys), ( - f"The forecast_array (shape={shape} should have the same " - f"length as the forecast_keys (len={len(self.forecast_keys)})." - ) - self.prediction_length = shape[-1] - self._forecast_dict = { - k: self.forecast_array[i] for i, k in enumerate(self.forecast_keys) - } - - self._nan_out = np.array([np.nan] * self.prediction_length) - - def quantile(self, q: Union[float, str]) -> np.ndarray: - q_str = Quantile.parse(q).name - # We return nan here such that evaluation runs through - return self._forecast_dict.get(q_str, self._nan_out) - - @property - def mean(self) -> np.ndarray: - """ - Forecast mean. - """ - return self._forecast_dict.get("mean", self._nan_out) - - def dim(self) -> int: - """ - Returns the dimensionality of the forecast object. - """ - if self._dim is not None: - return self._dim - else: - if ( - len(self.forecast_array.shape) == 2 - ): # 1D target. shape: (num_samples, prediction_length) - return 1 - else: - return self.forecast_array.shape[ - 1 - ] # 2D target. shape: (num_samples, target_dim, prediction_length) - - def __repr__(self): - return ", ".join( - [ - f"QuantileForecast({self.forecast_array!r})", - f"start_date={self.start_date!r}", - f"freq={self.freq!r}", - f"forecast_keys={self.forecast_keys!r}", - f"item_id={self.item_id!r}", - f"info={self.info!r})", - ] - ) - - -class DistributionForecast(Forecast): - """ - A `Forecast` object that uses a distribution directly. - This can for instance be used to represent marginal probability - distributions for each time point -- although joint distributions are - also possible, e.g. when using MultiVariateGaussian). - - Parameters - ---------- - distribution - Distribution object. This should represent the entire prediction - length, i.e., if we draw `num_samples` samples from the distribution, - the sample shape should be - - samples = trans_dist.sample(num_samples) - samples.shape -> (num_samples, prediction_length) - - start_date - start of the forecast - freq - forecast frequency - info - additional information that the forecaster may provide e.g. estimated - parameters, number of iterations ran etc. - """ - - def __init__( - self, - distribution: Distribution, - start_date: pd.Timestamp, - freq: str, - item_id: Optional[str] = None, - info: Optional[Dict] = None, - ) -> None: - self.distribution = distribution - self.shape = self.distribution.batch_shape + self.distribution.event_shape - self.prediction_length = self.shape[0] - self.item_id = item_id - self.info = info - - assert isinstance( - start_date, pd.Timestamp - ), "start_date should be a pandas Timestamp object" - self.start_date = start_date - - assert isinstance(freq, str), "freq should be a string" - self.freq = freq - self._mean = None - - @property - def mean(self) -> np.ndarray: - """ - Forecast mean. - """ - if self._mean is not None: - return self._mean - else: - self._mean = self.distribution.mean.cpu().numpy() - return self._mean - - @property - def mean_ts(self) -> pd.Series: - """ - Forecast mean, as a pandas.Series object. - """ - return pd.Series(data=self.mean, index=self.index) - - def quantile(self, level: Union[float, str]) -> np.ndarray: - level = Quantile.parse(level).value - q = self.distribution.icdf(torch.tensor([level])).cpu().numpy() - return q - - def to_sample_forecast(self, num_samples: int = 200) -> SampleForecast: - return SampleForecast( - samples=self.distribution.sample((num_samples,)), - start_date=self.start_date, - freq=self.freq, - item_id=self.item_id, - info=self.info, - ) diff --git a/pts/model/forecast_generator.py b/pts/model/forecast_generator.py deleted file mode 100644 index b842615..0000000 --- a/pts/model/forecast_generator.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from abc import ABC, abstractmethod -from typing import Any, Callable, Iterator, List, Optional - -import numpy as np -import torch -import torch.nn as nn - -from pts.core.component import validated -from pts.dataset import InferenceDataLoader, DataEntry, FieldName -from pts.modules import DistributionOutput -from .forecast import Forecast, DistributionForecast, QuantileForecast, SampleForecast - -OutputTransform = Callable[[DataEntry, np.ndarray], np.ndarray] - - -def _extract_instances(x: Any) -> Any: - """ - Helper function to extract individual instances from batched - mxnet results. - - For a tensor `a` - _extract_instances(a) -> [a[0], a[1], ...] - - For (nested) tuples of tensors `(a, (b, c))` - _extract_instances((a, (b, c)) -> [(a[0], (b[0], c[0])), (a[1], (b[1], c[1])), ...] - """ - if isinstance(x, (np.ndarray, torch.Tensor)): - for i in range(x.shape[0]): - # yield x[i: i + 1] - yield x[i] - elif isinstance(x, tuple): - for m in zip(*[_extract_instances(y) for y in x]): - yield tuple([r for r in m]) - elif isinstance(x, list): - for m in zip(*[_extract_instances(y) for y in x]): - yield [r for r in m] - elif x is None: - while True: - yield None - else: - assert False - - -class ForecastGenerator(ABC): - """ - Classes used to bring the output of a network into a class. - """ - - @abstractmethod - def __call__( - self, - inference_data_loader: InferenceDataLoader, - prediction_net: nn.Module, - input_names: List[str], - freq: str, - output_transform: Optional[OutputTransform], - num_samples: Optional[int], - **kwargs - ) -> Iterator[Forecast]: - pass - - -class DistributionForecastGenerator(ForecastGenerator): - def __init__(self, distr_output: DistributionOutput) -> None: - self.distr_output = distr_output - - def __call__( - self, - inference_data_loader: InferenceDataLoader, - prediction_net: nn.Module, - input_names: List[str], - freq: str, - output_transform: Optional[OutputTransform], - num_samples: Optional[int], - **kwargs - ) -> Iterator[DistributionForecast]: - for batch in inference_data_loader: - inputs = [batch[k] for k in input_names] - outputs = prediction_net(*inputs) - if output_transform is not None: - outputs = output_transform(batch, outputs) - - distributions = [ - self.distr_output.distribution(*u) for u in _extract_instances(outputs) - ] - - i = -1 - for i, distr in enumerate(distributions): - yield DistributionForecast( - distr, - start_date=batch["forecast_start"][i], - freq=freq, - item_id=batch[FieldName.ITEM_ID][i] - if FieldName.ITEM_ID in batch - else None, - info=batch["info"][i] if "info" in batch else None, - ) - assert i + 1 == len(batch["forecast_start"]) - - -class QuantileForecastGenerator(ForecastGenerator): - def __init__(self, quantiles: List[str]) -> None: - self.quantiles = quantiles - - def __call__( - self, - inference_data_loader: InferenceDataLoader, - prediction_net: nn.Module, - input_names: List[str], - freq: str, - output_transform: Optional[OutputTransform], - num_samples: Optional[int], - **kwargs - ) -> Iterator[Forecast]: - for batch in inference_data_loader: - inputs = [batch[k] for k in input_names] - outputs = prediction_net(*inputs).cpu().numpy() - if output_transform is not None: - outputs = output_transform(batch, outputs) - - i = -1 - for i, output in enumerate(outputs): - yield QuantileForecast( - output, - start_date=batch["forecast_start"][i], - freq=freq, - item_id=batch[FieldName.ITEM_ID][i] - if FieldName.ITEM_ID in batch - else None, - info=batch["info"][i] if "info" in batch else None, - forecast_keys=self.quantiles, - ) - assert i + 1 == len(batch["forecast_start"]) - - -class SampleForecastGenerator(ForecastGenerator): - - @validated() - def __init__(self): - pass - - def __call__( - self, - inference_data_loader: InferenceDataLoader, - prediction_net: nn.Module, - input_names: List[str], - freq: str, - output_transform: Optional[OutputTransform], - num_samples: Optional[int], - **kwargs - ) -> Iterator[Forecast]: - for batch in inference_data_loader: - inputs = [batch[k] for k in input_names] - outputs = prediction_net(*inputs).cpu().numpy() - if output_transform is not None: - outputs = output_transform(batch, outputs) - if num_samples: - num_collected_samples = outputs[0].shape[0] - collected_samples = [outputs] - while num_collected_samples < num_samples: - outputs = prediction_net(*inputs).cpu().numpy() - if output_transform is not None: - outputs = output_transform(batch, outputs) - collected_samples.append(outputs) - num_collected_samples += outputs[0].shape[0] - outputs = [ - np.concatenate(s)[:num_samples] for s in zip(*collected_samples) - ] - assert len(outputs[0]) == num_samples - i = -1 - for i, output in enumerate(outputs): - yield SampleForecast( - output, - start_date=batch["forecast_start"][i], - freq=freq, - item_id=batch[FieldName.ITEM_ID][i] - if FieldName.ITEM_ID in batch - else None, - info=batch["info"][i] if "info" in batch else None, - ) - assert i + 1 == len(batch["forecast_start"]) diff --git a/pts/model/lstnet/lstnet_estimator.py b/pts/model/lstnet/lstnet_estimator.py index 3b44865..11a2fb7 100644 --- a/pts/model/lstnet/lstnet_estimator.py +++ b/pts/model/lstnet/lstnet_estimator.py @@ -6,7 +6,7 @@ import torch.nn as nn from pts import Trainer from pts.dataset import FieldName -from pts.model import PTSEstimator, Predictor, PTSPredictor, copy_parameters +from pts.model import PyTorchEstimator, Predictor, PyTorchPredictor, copy_parameters from pts.transform import ( InstanceSplitter, Transformation, @@ -19,7 +19,7 @@ from pts.transform import ( from .lstnet_network import LSTNetTrain, LSTNetPredict -class LSTNetEstimator(PTSEstimator): +class LSTNetEstimator(PyTorchEstimator): def __init__( self, freq: str, @@ -110,7 +110,7 @@ class LSTNetEstimator(PTSEstimator): transformation: Transformation, trained_network: LSTNetTrain, device: torch.device, - ) -> PTSPredictor: + ) -> PyTorchPredictor: prediction_network = LSTNetPredict( num_series=self.num_series, channels=self.channels, @@ -131,7 +131,7 @@ class LSTNetEstimator(PTSEstimator): copy_parameters(trained_network, prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, diff --git a/pts/model/lstnet/lstnet_network.py b/pts/model/lstnet/lstnet_network.py index 8f5d160..e50e500 100644 --- a/pts/model/lstnet/lstnet_network.py +++ b/pts/model/lstnet/lstnet_network.py @@ -110,7 +110,7 @@ class LSTNetBase(nn.Module): ) -> torch.Tensor: scaled_past_target, scale = self.scaler( past_target[..., -self.context_length :], # [B, C, T] - past_observed_values[..., -self.context_length :] # [B, C, T] + past_observed_values[..., -self.context_length :], # [B, C, T] ) # CNN @@ -121,7 +121,7 @@ class LSTNetBase(nn.Module): # RNN r = c.permute(2, 0, 1) # [F (T), B, C] _, r = self.rnn(r) # [1, B, H] - r = self.dropout(r.squeeze(0)) # [B, H] + r = self.dropout(r.squeeze(0)) # [B, H] # Skip-RNN skip_c = c[..., -self.conv_skip * self.skip_size :] @@ -174,7 +174,7 @@ class LSTNetTrain(LSTNetBase): if self.horizon: future_target = future_target[..., -1:] - loss = self.loss_fn(ret*scale, future_target) + loss = self.loss_fn(ret * scale, future_target) return loss @@ -183,6 +183,6 @@ class LSTNetPredict(LSTNetBase): self, past_target: torch.Tensor, past_observed_values: torch.Tensor ) -> torch.Tensor: ret, scale = super().forward(past_target, past_observed_values) - ret = (ret*scale).permute(0, 2, 1) + ret = (ret * scale).permute(0, 2, 1) return ret.unsqueeze(1) diff --git a/pts/model/n_beats/n_beats_ensemble.py b/pts/model/n_beats/n_beats_ensemble.py index 861519c..3c0b86b 100644 --- a/pts/model/n_beats/n_beats_ensemble.py +++ b/pts/model/n_beats/n_beats_ensemble.py @@ -164,6 +164,7 @@ class NBEATSEnsembleEstimator(Estimator): **kwargs Arguments passed down to the individual estimators. """ + def __init__( self, freq: str, diff --git a/pts/model/n_beats/n_beats_estimator.py b/pts/model/n_beats/n_beats_estimator.py index 38adb0c..272e69b 100644 --- a/pts/model/n_beats/n_beats_estimator.py +++ b/pts/model/n_beats/n_beats_estimator.py @@ -5,7 +5,7 @@ import torch.nn as nn from pts import Trainer from pts.dataset import FieldName -from pts.model import PTSEstimator, Predictor, PTSPredictor, copy_parameters +from pts.model import PyTorchEstimator, Predictor, PyTorchPredictor, copy_parameters from pts.transform import ( InstanceSplitter, Transformation, @@ -20,7 +20,7 @@ from .n_beats_network import ( ) -class NBEATSEstimator(PTSEstimator): +class NBEATSEstimator(PyTorchEstimator): def __init__( self, freq: str, @@ -124,10 +124,14 @@ class NBEATSEstimator(PTSEstimator): # conditioning part and a to-predict part, for each training example. def create_transformation(self) -> Transformation: return Chain( - [ RemoveFields( - field_names=[FieldName.FEAT_STATIC_REAL, - FieldName.FEAT_DYNAMIC_REAL, - FieldName.FEAT_DYNAMIC_CAT]), + [ + RemoveFields( + field_names=[ + FieldName.FEAT_STATIC_REAL, + FieldName.FEAT_DYNAMIC_REAL, + FieldName.FEAT_DYNAMIC_CAT, + ] + ), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, @@ -137,11 +141,11 @@ class NBEATSEstimator(PTSEstimator): past_length=self.context_length, future_length=self.prediction_length, time_series_fields=[], - ) + ), ] ) - - def create_training_network(self, device: torch.device) -> NBEATSTrainingNetwork: + + def create_training_network(self, device: torch.device) -> NBEATSTrainingNetwork: return NBEATSTrainingNetwork( prediction_length=self.prediction_length, context_length=self.context_length, @@ -156,10 +160,9 @@ class NBEATSEstimator(PTSEstimator): freq=self.freq, ).to(device) - def create_predictor( - self, - transformation: Transformation, + self, + transformation: Transformation, trained_network: nn.Module, device: torch.device, ) -> Predictor: @@ -172,12 +175,12 @@ class NBEATSEstimator(PTSEstimator): num_block_layers=self.num_block_layers, expansion_coefficient_lengths=self.expansion_coefficient_lengths, sharing=self.sharing, - stack_types=self.stack_types + stack_types=self.stack_types, ).to(device) copy_parameters(trained_network, prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, diff --git a/pts/model/n_beats/n_beats_network.py b/pts/model/n_beats/n_beats_network.py index af5fcea..3d3aae9 100644 --- a/pts/model/n_beats/n_beats_network.py +++ b/pts/model/n_beats/n_beats_network.py @@ -258,7 +258,8 @@ class NBEATSNetwork(nn.Module): flag = denominator == 0 return (200 / self.prediction_length) * torch.mean( - (torch.abs(future_target - forecast) * torch.logical_not(flag)) / (denominator + flag), + (torch.abs(future_target - forecast) * torch.logical_not(flag)) + / (denominator + flag), dim=1, ) @@ -269,7 +270,8 @@ class NBEATSNetwork(nn.Module): flag = denominator == 0 return (100 / self.prediction_length) * torch.mean( - (torch.abs(future_target - forecast) * torch.logical_not(flag)) / (denominator + flag), + (torch.abs(future_target - forecast) * torch.logical_not(flag)) + / (denominator + flag), dim=1, ) @@ -292,9 +294,10 @@ class NBEATSNetwork(nn.Module): ) flag = seasonal_error == 0 - return (torch.mean(torch.abs(future_target - forecast), dim=1) * torch.logical_not(flag)) / ( - seasonal_error + flag - ) + return ( + torch.mean(torch.abs(future_target - forecast), dim=1) + * torch.logical_not(flag) + ) / (seasonal_error + flag) class NBEATSTrainingNetwork(NBEATSNetwork): @@ -342,4 +345,3 @@ class NBEATSPredictionNetwork(NBEATSNetwork): forecasts = super().forward(past_target=past_target) return forecasts.unsqueeze(1) - diff --git a/pts/model/predictor.py b/pts/model/predictor.py deleted file mode 100644 index a232705..0000000 --- a/pts/model/predictor.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import json -from abc import ABC, abstractmethod -from pathlib import Path -from pydoc import locate -from typing import Iterator, Callable, Optional - -import numpy as np -import torch -import torch.nn as nn - -import pts -from pts.core.serde import dump_json, fqname_for, load_json -from pts.dataset import Dataset, DataEntry, InferenceDataLoader -from pts.transform import Transformation -from .forecast import Forecast -from .forecast_generator import ForecastGenerator, SampleForecastGenerator -from .utils import get_module_forward_input_names - -OutputTransform = Callable[[DataEntry, np.ndarray], np.ndarray] - - -class Predictor(ABC): - - __version__: str = pts.__version__ - - def __init__(self, prediction_length: int, freq: str) -> None: - self.prediction_length = prediction_length - self.freq = freq - - @abstractmethod - def predict(self, dataset: Dataset, **kwargs) -> Iterator[Forecast]: - pass - - def serialize(self, path: Path) -> None: - # serialize Predictor type - with (path / "type.txt").open("w") as fp: - fp.write(fqname_for(self.__class__)) - with (path / "version.json").open("w") as fp: - json.dump( - {"model": self.__version__, "pts": pts.__version__}, fp - ) - - @classmethod - def deserialize( - cls, path: Path, device: Optional[torch.device] = None - ) -> "Predictor": - """ - Load a serialized predictor from the given path - Parameters - ---------- - path - Path to the serialized files predictor. - device - Optional pytorch to be used with the predictor. - If nothing is passed will use the GPU if available and CPU otherwise. - """ - # deserialize Predictor type - with (path / "type.txt").open("r") as fp: - tpe = locate(fp.readline()) - - # ensure that predictor_cls is a subtype of Predictor - if not issubclass(tpe, Predictor): - raise IOError( - f"Class {fqname_for(tpe)} is not " - f"a subclass of {fqname_for(Predictor)}" - ) - # call deserialize() for the concrete Predictor type - return tpe.deserialize(path, device) - - -class PTSPredictor(Predictor): - def __init__( - self, - prediction_net: nn.Module, - batch_size: int, - prediction_length: int, - freq: str, - device: torch.device, - input_transform: Transformation, - forecast_generator: ForecastGenerator = SampleForecastGenerator(), - output_transform: Optional[OutputTransform] = None, - dtype: np.dtype = np.float32, - ) -> None: - super().__init__(prediction_length, freq) - self.input_names = get_module_forward_input_names(prediction_net) - self.prediction_net = prediction_net - self.batch_size = batch_size - self.input_transform = input_transform - self.forecast_generator = forecast_generator - self.output_transform = output_transform - self.device = device - self.dtype = dtype - - def predict( - self, dataset: Dataset, num_samples: Optional[int] = None - ) -> Iterator[Forecast]: - inference_data_loader = InferenceDataLoader( - dataset, - self.input_transform, - self.batch_size, - device=self.device, - dtype=self.dtype, - ) - - self.prediction_net.eval() - - with torch.no_grad(): - yield from self.forecast_generator( - inference_data_loader=inference_data_loader, - prediction_net=self.prediction_net, - input_names=self.input_names, - freq=self.freq, - output_transform=self.output_transform, - num_samples=num_samples, - ) - - def serialize(self, path: Path) -> None: - - super().serialize(path) - - # serialize network - model_name = 'prediction_net' - with (path / f"{model_name}-network.json").open("w") as fp: - print(dump_json(self.prediction_net), file=fp) - torch.save(self.prediction_net.state_dict(), path / "prediction_net") - - # serialize input transformation chain - with (path / "input_transform.json").open("w") as fp: - print(dump_json(self.input_transform), file=fp) - - # serialize output transformation chain - with (path / "output_transform.json").open("w") as fp: - print(dump_json(self.output_transform), file=fp) - - # serialize all remaining constructor parameters - with (path / "parameters.json").open("w") as fp: - parameters = dict( - batch_size=self.batch_size, - prediction_length=self.prediction_length, - freq=self.freq, - dtype=self.dtype, - forecast_generator=self.forecast_generator, - input_names=self.input_names, - ) - print(dump_json(parameters), file=fp) - - @classmethod - def deserialize( - cls, path: Path, device: Optional[torch.device] = None - ) -> "PTSPredictor": - - # deserialize constructor parameters - with (path / "parameters.json").open("r") as fp: - parameters = load_json(fp.read()) - - # deserialize transformation chain - with (path / "input_transform.json").open("r") as fp: - transformation = load_json(fp.read()) - - # deserialize prediction network - model_name = 'prediction_net' - with (path / f"{model_name}-network.json").open("r") as fp: - prediction_net = load_json(fp.read()) - prediction_net.load_state_dict(torch.load(path / "prediction_net")) - - # input_names is derived from the prediction_net - if "input_names" in parameters: - del parameters["input_names"] - - parameters["device"] = device - - return PTSPredictor( - input_transform=transformation, - prediction_net=prediction_net, - **parameters - ) diff --git a/pts/model/quantile.py b/pts/model/quantile.py deleted file mode 100644 index 62265cf..0000000 --- a/pts/model/quantile.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -import re -from typing import NamedTuple, Union - - -class Quantile(NamedTuple): - value: float - name: str - - @property - def loss_name(self): - return f"QuantileLoss[{self.name}]" - - @property - def weighted_loss_name(self): - return f"wQuantileLoss[{self.name}]" - - @property - def coverage_name(self): - return f"Coverage[{self.name}]" - - @classmethod - def checked(cls, value: float, name: str) -> "Quantile": - if not 0 <= value <= 1: - raise Exception(f"quantile value should be in [0, 1] but found {value}") - - return Quantile(value, name) - - @classmethod - def from_float(cls, quantile: float) -> "Quantile": - assert isinstance(quantile, float) - return cls.checked(value=quantile, name=str(quantile)) - - @classmethod - def from_str(cls, quantile: str) -> "Quantile": - assert isinstance(quantile, str) - try: - return cls.checked(value=float(quantile), name=quantile) - except ValueError: - m = re.match(r"^p(\d{2})$", quantile) - - if m is None: - raise Exception( - "Quantile string should be of the form " - f'"p10", "p50", ... or "0.1", "0.5", ... but found {quantile}' - ) - else: - quantile_float: float = int(m.group(1)) / 100 - return cls(value=quantile_float, name=str(quantile_float)) - - @classmethod - def parse(cls, quantile: Union["Quantile", float, str]) -> "Quantile": - """Produces equivalent float and string representation of a given - quantile level. - - >>> Quantile.parse(0.1) - Quantile(value=0.1, name='0.1') - - >>> Quantile.parse('0.2') - Quantile(value=0.2, name='0.2') - - >>> Quantile.parse('0.20') - Quantile(value=0.2, name='0.20') - - >>> Quantile.parse('p99') - Quantile(value=0.99, name='0.99') - - Parameters - ---------- - quantile - Quantile, can be a float a str representing a float e.g. '0.1' or a - quantile string of the form 'p0.1'. - - Returns - ------- - Quantile - A tuple containing both a float and a string representation of the - input quantile level. - """ - if isinstance(quantile, Quantile): - return quantile - elif isinstance(quantile, float): - return cls.from_float(quantile) - else: - return cls.from_str(quantile) diff --git a/pts/model/simple_feedforward/__init__.py b/pts/model/simple_feedforward/__init__.py index 27dc9a9..5174ac4 100644 --- a/pts/model/simple_feedforward/__init__.py +++ b/pts/model/simple_feedforward/__init__.py @@ -3,4 +3,3 @@ from .simple_feedforward_network import ( SimpleFeedForwardTrainingNetwork, SimpleFeedForwardPredictionNetwork, ) - diff --git a/pts/model/simple_feedforward/simple_feedforward_estimator.py b/pts/model/simple_feedforward/simple_feedforward_estimator.py index 722f338..dd1a053 100644 --- a/pts/model/simple_feedforward/simple_feedforward_estimator.py +++ b/pts/model/simple_feedforward/simple_feedforward_estimator.py @@ -3,29 +3,39 @@ from typing import List, Optional import torch import torch.nn as nn -from pts import Trainer -from pts.dataset import FieldName -from pts.model import PTSEstimator, PTSPredictor, copy_parameters -from pts.modules import DistributionOutput, StudentTOutput -from pts.transform import ( +from gluonts.torch.support.util import copy_parameters +from gluonts.torch.model.predictor import PyTorchPredictor +from gluonts.torch.modules.distribution_output import DistributionOutput +from gluonts.model.predictor import Predictor +from gluonts.dataset.field_names import FieldName +from gluonts.time_feature import ( + TimeFeature, + get_lags_for_frequency, + time_features_from_frequency_str, +) +from gluonts.transform import ( Transformation, Chain, InstanceSplitter, ExpectedNumInstanceSampler, ) +from pts import Trainer +from pts.model import PyTorchEstimator +from pts.modules import StudentTOutput + from .simple_feedforward_network import ( SimpleFeedForwardTrainingNetwork, SimpleFeedForwardPredictionNetwork, ) -class SimpleFeedForwardEstimator(PTSEstimator): +class SimpleFeedForwardEstimator(PyTorchEstimator): """ SimpleFeedForwardEstimator shows how to build a simple MLP model predicting the next target time-steps given the previous ones. Given that we want to define a pytorch model trainable by SGD, we inherit the - parent class `PTSEstimator` that handles most of the logic for fitting a + parent class `PyTorchEstimator` that handles most of the logic for fitting a neural-network. We thus only have to define: @@ -148,7 +158,7 @@ class SimpleFeedForwardEstimator(PTSEstimator): transformation: Transformation, trained_network: nn.Module, device: torch.device, - ) -> PTSPredictor: + ) -> PyTorchPredictor: prediction_network = SimpleFeedForwardPredictionNetwork( num_hidden_dimensions=self.num_hidden_dimensions, prediction_length=self.prediction_length, @@ -161,7 +171,7 @@ class SimpleFeedForwardEstimator(PTSEstimator): copy_parameters(trained_network, prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, diff --git a/pts/model/simple_feedforward/simple_feedforward_network.py b/pts/model/simple_feedforward/simple_feedforward_network.py index 4fe418d..5863e37 100644 --- a/pts/model/simple_feedforward/simple_feedforward_network.py +++ b/pts/model/simple_feedforward/simple_feedforward_network.py @@ -4,8 +4,10 @@ import torch import torch.nn as nn from torch.distributions import Distribution -from pts.core.component import validated -from pts.modules import MeanScaler, NOPScaler, DistributionOutput, LambdaLayer +from gluonts.core.component import validated +from gluonts.torch.modules.distribution_output import DistributionOutput +from gluonts.torch.modules.lambda_layer import LambdaLayer +from pts.modules import MeanScaler, NOPScaler class SimpleFeedForwardNetworkBase(nn.Module): @@ -35,6 +37,7 @@ class SimpleFeedForwardNetworkBase(nn.Module): Distribution to fit. kwargs """ + @validated() def __init__( self, @@ -60,7 +63,7 @@ class SimpleFeedForwardNetworkBase(nn.Module): if i == 0: input_size = context_length else: - input_size = dims[i-1] + input_size = dims[i - 1] modules += [nn.Linear(input_size, units), nn.ReLU()] if self.batch_normalization: modules.append(nn.BatchNorm1d(units)) @@ -83,7 +86,7 @@ class SimpleFeedForwardNetworkBase(nn.Module): past_target, torch.ones_like(past_target), # TODO: pass the actual observed here ) - + mlp_outputs = self.mlp(scaled_target) distr_args = self.distr_args_proj(mlp_outputs) return self.distr_output.distribution( diff --git a/pts/model/tempflow/tempflow_estimator.py b/pts/model/tempflow/tempflow_estimator.py index 7429770..0e6e4e7 100644 --- a/pts/model/tempflow/tempflow_estimator.py +++ b/pts/model/tempflow/tempflow_estimator.py @@ -9,7 +9,7 @@ from pts.feature import ( fourier_time_features_from_frequency_str, get_fourier_lags_for_frequency, ) -from pts.model import PTSEstimator, PTSPredictor, copy_parameters +from pts.model import PyTorchEstimator, PyTorchPredictor, copy_parameters from pts.transform import ( Transformation, Chain, @@ -27,7 +27,7 @@ from pts.transform import ( from .tempflow_network import TempFlowTrainingNetwork, TempFlowPredictionNetwork -class TempFlowEstimator(PTSEstimator): +class TempFlowEstimator(PyTorchEstimator): def __init__( self, input_size: int, @@ -49,7 +49,6 @@ class TempFlowEstimator(PTSEstimator): n_hidden=2, conditioning_length: int = 200, dequantize: bool = False, - scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, @@ -100,10 +99,16 @@ class TempFlowEstimator(PTSEstimator): def create_transformation(self) -> Transformation: return Chain( [ - AsNumpyArray(field=FieldName.TARGET, expected_ndim=2,), + AsNumpyArray( + field=FieldName.TARGET, + expected_ndim=2, + ), # maps the target to (1, T) # if the target data is uni dimensional - ExpandDimArray(field=FieldName.TARGET, axis=None,), + ExpandDimArray( + field=FieldName.TARGET, + axis=None, + ), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, @@ -176,7 +181,7 @@ class TempFlowEstimator(PTSEstimator): transformation: Transformation, trained_network: TempFlowTrainingNetwork, device: torch.device, - ) -> PTSPredictor: + ) -> PyTorchPredictor: prediction_network = TempFlowPredictionNetwork( input_size=self.input_size, target_dim=self.target_dim, @@ -202,7 +207,7 @@ class TempFlowEstimator(PTSEstimator): copy_parameters(trained_network, prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, diff --git a/pts/model/tempflow/tempflow_network.py b/pts/model/tempflow/tempflow_network.py index e4ee471..f954dfa 100644 --- a/pts/model/tempflow/tempflow_network.py +++ b/pts/model/tempflow/tempflow_network.py @@ -3,13 +3,12 @@ from typing import List, Optional, Tuple, Union import torch import torch.nn as nn -from pts.core.component import validated +from gluonts.core.component import validated from pts.model import weighted_average from pts.modules import RealNVP, MAF, FlowOutput, MeanScaler, NOPScaler class TempFlowTrainingNetwork(nn.Module): - @validated() def __init__( self, @@ -55,7 +54,10 @@ class TempFlowTrainingNetwork(nn.Module): batch_first=True, ) - flow_cls = {"RealNVP": RealNVP, "MAF": MAF,}[flow_type] + flow_cls = { + "RealNVP": RealNVP, + "MAF": MAF, + }[flow_type] self.flow = flow_cls( input_size=target_dim, n_blocks=n_blocks, @@ -377,7 +379,8 @@ class TempFlowTrainingNetwork(nn.Module): # put together target sequence # (batch_size, seq_len, target_dim) target = torch.cat( - (past_target_cdf[:, -self.context_length :, ...], future_target_cdf), dim=1, + (past_target_cdf[:, -self.context_length :, ...], future_target_cdf), + dim=1, ) # assert_shape(target, (-1, seq_len, self.target_dim)) @@ -519,7 +522,12 @@ class TempFlowPredictionNetwork(TempFlowTrainingNetwork): # (batch_size, num_samples, prediction_length, target_dim) return samples.reshape( - (-1, self.num_parallel_samples, self.prediction_length, self.target_dim,) + ( + -1, + self.num_parallel_samples, + self.prediction_length, + self.target_dim, + ) ) def forward( diff --git a/pts/model/transformer/__init__.py b/pts/model/transformer/__init__.py index e0302dd..7ba47bf 100644 --- a/pts/model/transformer/__init__.py +++ b/pts/model/transformer/__init__.py @@ -1 +1 @@ -from .transformer_estimator import TransformerEstimator \ No newline at end of file +from .transformer_estimator import TransformerEstimator diff --git a/pts/model/transformer/transformer_estimator.py b/pts/model/transformer/transformer_estimator.py index da285bb..d7d2cb3 100644 --- a/pts/model/transformer/transformer_estimator.py +++ b/pts/model/transformer/transformer_estimator.py @@ -11,7 +11,7 @@ from pts.feature import ( fourier_time_features_from_frequency_str, get_fourier_lags_for_frequency, ) -from pts.model import PTSEstimator, Predictor, PTSPredictor, copy_parameters +from pts.model import PyTorchEstimator, Predictor, PyTorchPredictor, copy_parameters from pts.modules import DistributionOutput, StudentTOutput from pts.transform import ( Transformation, @@ -32,7 +32,7 @@ from .transformer_network import ( ) -class TransformerEstimator(PTSEstimator): +class TransformerEstimator(PyTorchEstimator): def __init__( self, input_size: int, @@ -75,7 +75,9 @@ class TransformerEstimator(PTSEstimator): self.embedding_dimension = embedding_dimension self.num_parallel_samples = num_parallel_samples self.lags_seq = ( - lags_seq if lags_seq is not None else get_fourier_lags_for_frequency(freq_str=freq) + lags_seq + if lags_seq is not None + else get_fourier_lags_for_frequency(freq_str=freq) ) self.time_features = ( time_features @@ -117,7 +119,9 @@ class TransformerEstimator(PTSEstimator): field=FieldName.FEAT_STATIC_CAT, expected_ndim=1, dtype=np.long ), AsNumpyArray( - field=FieldName.FEAT_STATIC_REAL, expected_ndim=1, dtype=self.dtype, + field=FieldName.FEAT_STATIC_REAL, + expected_ndim=1, + dtype=self.dtype, ), AsNumpyArray( field=FieldName.TARGET, @@ -220,7 +224,7 @@ class TransformerEstimator(PTSEstimator): copy_parameters(trained_network, prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, diff --git a/pts/model/transformer/transformer_network.py b/pts/model/transformer/transformer_network.py index 38e5ca5..6c1af01 100644 --- a/pts/model/transformer/transformer_network.py +++ b/pts/model/transformer/transformer_network.py @@ -3,7 +3,7 @@ from typing import List, Optional, Tuple import torch import torch.nn as nn -from pts.core.component import validated +from gluonts.core.component import validated from pts.modules import DistributionOutput, MeanScaler, NOPScaler, FeatureEmbedder @@ -15,7 +15,6 @@ def prod(xs): class TransformerNetwork(nn.Module): - @validated() def __init__( self, @@ -72,7 +71,8 @@ class TransformerNetwork(nn.Module): self.proj_dist_args = distr_output.get_args_proj(d_model) self.embedder = FeatureEmbedder( - cardinalities=cardinality, embedding_dims=embedding_dimension, + cardinalities=cardinality, + embedding_dims=embedding_dimension, ) if scaling: @@ -82,7 +82,8 @@ class TransformerNetwork(nn.Module): # mask self.register_buffer( - "tgt_mask", self.transformer.generate_square_subsequent_mask(prediction_length) + "tgt_mask", + self.transformer.generate_square_subsequent_mask(prediction_length), ) @staticmethod @@ -154,9 +155,7 @@ class TransformerNetwork(nn.Module): else: time_feat = torch.cat( ( - past_time_feat[ - :, self.history_length - self.context_length :, ... - ], + past_time_feat[:, self.history_length - self.context_length :, ...], future_time_feat, ), dim=1, @@ -177,7 +176,7 @@ class TransformerNetwork(nn.Module): # scale shape is (batch_size, 1, *target_shape) _, scale = self.scaler( past_target[:, -self.context_length :, ...], - past_observed_values[:, -self.context_length :, ...] + past_observed_values[:, -self.context_length :, ...], ) embedded_cat = self.embedder(feat_static_cat) diff --git a/pts/model/transformer_tempflow/transformer_tempflow_estimator.py b/pts/model/transformer_tempflow/transformer_tempflow_estimator.py index 3f93183..d489d35 100644 --- a/pts/model/transformer_tempflow/transformer_tempflow_estimator.py +++ b/pts/model/transformer_tempflow/transformer_tempflow_estimator.py @@ -9,7 +9,7 @@ from pts.feature import ( fourier_time_features_from_frequency_str, get_fourier_lags_for_frequency, ) -from pts.model import PTSEstimator, PTSPredictor, copy_parameters +from pts.model import PyTorchEstimator, PyTorchPredictor, copy_parameters from pts.transform import ( Transformation, Chain, @@ -24,10 +24,13 @@ from pts.transform import ( SetFieldIfNotPresent, TargetDimIndicator, ) -from .transformer_tempflow_network import TransformerTempFlowTrainingNetwork, TransformerTempFlowPredictionNetwork +from .transformer_tempflow_network import ( + TransformerTempFlowTrainingNetwork, + TransformerTempFlowPredictionNetwork, +) -class TransformerTempFlowEstimator(PTSEstimator): +class TransformerTempFlowEstimator(PyTorchEstimator): def __init__( self, input_size: int, @@ -52,7 +55,6 @@ class TransformerTempFlowEstimator(PTSEstimator): n_hidden=2, conditioning_length: int = 200, dequantize: bool = False, - scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, @@ -108,10 +110,16 @@ class TransformerTempFlowEstimator(PTSEstimator): def create_transformation(self) -> Transformation: return Chain( [ - AsNumpyArray(field=FieldName.TARGET, expected_ndim=2,), + AsNumpyArray( + field=FieldName.TARGET, + expected_ndim=2, + ), # maps the target to (1, T) # if the target data is uni dimensional - ExpandDimArray(field=FieldName.TARGET, axis=None,), + ExpandDimArray( + field=FieldName.TARGET, + axis=None, + ), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, @@ -156,7 +164,9 @@ class TransformerTempFlowEstimator(PTSEstimator): ] ) - def create_training_network(self, device: torch.device) -> TransformerTempFlowTrainingNetwork: + def create_training_network( + self, device: torch.device + ) -> TransformerTempFlowTrainingNetwork: return TransformerTempFlowTrainingNetwork( input_size=self.input_size, target_dim=self.target_dim, @@ -187,7 +197,7 @@ class TransformerTempFlowEstimator(PTSEstimator): transformation: Transformation, trained_network: TransformerTempFlowTrainingNetwork, device: torch.device, - ) -> PTSPredictor: + ) -> PyTorchPredictor: prediction_network = TransformerTempFlowPredictionNetwork( input_size=self.input_size, target_dim=self.target_dim, @@ -216,7 +226,7 @@ class TransformerTempFlowEstimator(PTSEstimator): copy_parameters(trained_network, prediction_network) - return PTSPredictor( + return PyTorchPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, diff --git a/pts/model/transformer_tempflow/transformer_tempflow_network.py b/pts/model/transformer_tempflow/transformer_tempflow_network.py index 3a064e0..69e5768 100644 --- a/pts/model/transformer_tempflow/transformer_tempflow_network.py +++ b/pts/model/transformer_tempflow/transformer_tempflow_network.py @@ -3,12 +3,11 @@ from typing import List, Optional, Tuple import torch import torch.nn as nn -from pts.core.component import validated +from gluonts.core.component import validated from pts.modules import RealNVP, MAF, FlowOutput, MeanScaler, NOPScaler class TransformerTempFlowTrainingNetwork(nn.Module): - @validated() def __init__( self, @@ -61,7 +60,10 @@ class TransformerTempFlowTrainingNetwork(nn.Module): activation=act_type, ) - flow_cls = {"RealNVP": RealNVP, "MAF": MAF,}[flow_type] + flow_cls = { + "RealNVP": RealNVP, + "MAF": MAF, + }[flow_type] self.flow = flow_cls( input_size=target_dim, n_blocks=n_blocks, @@ -146,9 +148,7 @@ class TransformerTempFlowTrainingNetwork(nn.Module): future_time_feat: Optional[torch.Tensor], future_target_cdf: Optional[torch.Tensor], target_dimension_indicator: torch.Tensor, - ) -> Tuple[ - torch.Tensor, torch.Tensor, torch.Tensor, - ]: + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor,]: """ Unrolls the RNN encoder over past and, if present, future data. Returns outputs and state of the encoder, plus the scale of @@ -204,7 +204,10 @@ class TransformerTempFlowTrainingNetwork(nn.Module): subsequences_length = self.context_length else: time_feat = torch.cat( - (past_time_feat[:, -self.context_length :, ...], future_time_feat,), + ( + past_time_feat[:, -self.context_length :, ...], + future_time_feat, + ), dim=1, ) sequence = torch.cat((past_target_cdf, future_target_cdf), dim=1) @@ -516,7 +519,12 @@ class TransformerTempFlowPredictionNetwork(TransformerTempFlowTrainingNetwork): # (batch_size, num_samples, prediction_length, target_dim) return samples.reshape( - (-1, self.num_parallel_samples, self.prediction_length, self.target_dim,) + ( + -1, + self.num_parallel_samples, + self.prediction_length, + self.target_dim, + ) ) def forward( diff --git a/pts/model/utils.py b/pts/model/utils.py index 356bd08..209ae1b 100644 --- a/pts/model/utils.py +++ b/pts/model/utils.py @@ -7,30 +7,35 @@ import torch.nn as nn def get_module_forward_input_names(module: nn.Module): params = inspect.signature(module.forward).parameters - return list(params) - - -def copy_parameters(net_source: nn.Module, net_dest: nn.Module) -> None: - net_dest.load_state_dict(net_source.state_dict()) + param_names = [k for k, v in params.items() if not str(v).startswith("*")] + return param_names def weighted_average( - tensor: torch.Tensor, weights: Optional[torch.Tensor] = None, dim=None -): + x: torch.Tensor, weights: Optional[torch.Tensor] = None, dim=None +) -> torch.Tensor: + """ + Computes the weighted average of a given tensor across a given dim, masking + values associated with weight zero, + meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`. + + Parameters + ---------- + x + Input tensor, of which the average must be computed. + weights + Weights tensor, of the same shape as `x`. + dim + The dim along which to average `x` + + Returns + ------- + Tensor: + The tensor with values averaged along the specified `dim`. + """ if weights is not None: - weighted_tensor = tensor * weights - if dim is not None: - sum_weights = torch.sum(weights, dim) - sum_weighted_tensor = torch.sum(weighted_tensor, dim) - else: - sum_weights = weights.sum() - sum_weighted_tensor = weighted_tensor.sum() - - sum_weights = torch.max(torch.ones_like(sum_weights), sum_weights) - - return sum_weighted_tensor / sum_weights + weighted_tensor = torch.where(weights != 0, x * weights, torch.zeros_like(x)) + sum_weights = torch.clamp(weights.sum(dim=dim) if dim else weights.sum(), min=1.0) + return (weighted_tensor.sum(dim=dim) if dim else weighted_tensor.sum())/ sum_weights else: - if dim is not None: - return torch.mean(tensor, dim=dim) - else: - return tensor.mean() + return x.mean(dim=dim) diff --git a/pts/modules/__init__.py b/pts/modules/__init__.py index ab4a4f2..601f1ab 100644 --- a/pts/modules/__init__.py +++ b/pts/modules/__init__.py @@ -1,7 +1,4 @@ from .distribution_output import ( - ArgProj, - Output, - DistributionOutput, NormalOutput, StudentTOutput, BetaOutput, @@ -20,5 +17,4 @@ from .distribution_output import ( ) from .feature import FeatureEmbedder, FeatureAssembler from .flows import RealNVP, MAF -from .lambda_layer import LambdaLayer from .scaler import MeanScaler, NOPScaler diff --git a/pts/modules/distribution_output.py b/pts/modules/distribution_output.py index 6aff1e1..49431fb 100644 --- a/pts/modules/distribution_output.py +++ b/pts/modules/distribution_output.py @@ -19,7 +19,8 @@ from torch.distributions import ( MultivariateNormal, TransformedDistribution, AffineTransform, - Poisson) + Poisson, +) from pts.distributions import ( ZeroInflatedPoisson, @@ -29,79 +30,13 @@ from pts.distributions import ( ImplicitQuantile, TransformedImplicitQuantile, ) -from pts.core.component import validated +from gluonts.core.component import validated +from gluonts.torch.modules.distribution_output import ( + DistributionOutput, + LambdaLayer, + PtArgProj, +) from pts.modules.iqn_modules import ImplicitQuantileModule -from .lambda_layer import LambdaLayer - - -class ArgProj(nn.Module): - def __init__( - self, - in_features: int, - args_dim: Dict[str, int], - domain_map: Callable[..., Tuple[torch.Tensor]], - dtype: np.dtype = np.float32, - prefix: Optional[str] = None, - **kwargs, - ): - super().__init__(**kwargs) - self.args_dim = args_dim - self.dtype = dtype - self.proj = nn.ModuleList( - [nn.Linear(in_features, dim) for dim in args_dim.values()] - ) - self.domain_map = domain_map - - def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor]: - params_unbounded = [proj(x) for proj in self.proj] - - return self.domain_map(*params_unbounded) - - -class Output(ABC): - in_features: int - args_dim: Dict[str, int] - _dtype: np.dtype = np.float32 - - @property - def dtype(self): - return self._dtype - - @dtype.setter - def dtype(self, dtype: np.dtype): - self._dtype = dtype - - def get_args_proj(self, in_features: int, prefix: Optional[str] = None) -> ArgProj: - return ArgProj( - in_features=in_features, - args_dim=self.args_dim, - domain_map=LambdaLayer(self.domain_map), - prefix=prefix, - dtype=self.dtype, - ) - - @abstractclassmethod - def domain_map(cls, *args: torch.Tensor): - pass - - -class DistributionOutput(Output, ABC): - - distr_cls: type - - @validated() - def __init__(self) -> None: - pass - - def distribution( - self, distr_args, scale: Optional[torch.Tensor] = None - ) -> Distribution: - - distr = self.distr_cls(*distr_args) - if scale is None: - return distr - else: - return TransformedDistribution(distr, [AffineTransform(loc=0, scale=scale)]) class IndependentDistributionOutput(DistributionOutput): @@ -364,7 +299,9 @@ class PiecewiseLinearOutput(DistributionOutput): return gamma.squeeze(axis=-1), slopes_proj, knot_spacings_proj def distribution( - self, distr_args, scale: Optional[torch.Tensor] = None, + self, + distr_args, + scale: Optional[torch.Tensor] = None, ) -> PiecewiseLinear: if scale is None: return self.distr_cls(*distr_args) @@ -415,7 +352,11 @@ class NormalMixtureOutput(DistributionOutput): class LowRankMultivariateNormalOutput(DistributionOutput): @validated() def __init__( - self, dim: int, rank: int, sigma_init: float = 1.0, sigma_minimum: float = 1e-3, + self, + dim: int, + rank: int, + sigma_init: float = 1.0, + sigma_minimum: float = 1e-3, ) -> None: self.distr_cls = LowRankMultivariateNormal self.dim = dim @@ -508,25 +449,16 @@ class FlowOutput(DistributionOutput): return (self.dim,) -class QuantileArgProj(ArgProj): +class QuantilePtArgProj(PtArgProj): def __init__( - self, - in_features: int, - output_domain_cls: nn.Module, - args_dim: Dict[str, int], - domain_map: Callable[..., Tuple[torch.Tensor]], - dtype: np.dtype = np.float32, - prefix: Optional[str] = None, - **kwargs, + self, + in_features: int, + output_domain_cls: nn.Module, + args_dim: Dict[str, int], + domain_map: Callable[..., Tuple[torch.Tensor]], + **kwargs, ): - super().__init__( - in_features, - args_dim, - domain_map, - dtype, - prefix, - **kwargs - ) + super().__init__(in_features, args_dim, domain_map, **kwargs) self.output_domain_cls = output_domain_cls self.proj = ImplicitQuantileModule(in_features, output_domain_cls) @@ -535,8 +467,8 @@ class QuantileArgProj(ArgProj): forecast_length = x.shape[1] device = x.device taus = torch.rand(size=(batch_size, forecast_length), device=device) - self.register_buffer('taus', taus) - self.register_buffer('nn_ouput', x.clone().detach()) + self.register_buffer("taus", taus) + self.register_buffer("nn_ouput", x.clone().detach()) predicted_quantiles = self.proj(x, taus) return self.domain_map(predicted_quantiles) @@ -548,6 +480,7 @@ class ImplicitQuantileOutput(IndependentDistributionOutput): output_domain_cls: type = nn.Module quantile_arg_proj: type = nn.Module + @validated() def __init__(self, output_domain: str) -> None: super().__init__() self.set_output_domain_map(output_domain) @@ -559,14 +492,17 @@ class ImplicitQuantileOutput(IndependentDistributionOutput): "Positive": nn.Softplus, "Real": nn.Identity, } - assert output_domain in available_domain_map_cls.keys(), \ - "Only the following output domains are allowed: {}".format(available_domain_map_cls.keys()) + assert ( + output_domain in available_domain_map_cls.keys() + ), "Only the following output domains are allowed: {}".format( + available_domain_map_cls.keys() + ) output_domain_cls = available_domain_map_cls[output_domain] cls.output_domain_cls = output_domain_cls @classmethod def set_args_proj(cls): - cls.quantile_arg_proj = QuantileArgProj( + cls.quantile_arg_proj = QuantilePtArgProj( in_features=cls.in_features, output_domain_cls=cls.output_domain_cls, args_dim=cls.args_dim, @@ -584,11 +520,13 @@ class ImplicitQuantileOutput(IndependentDistributionOutput): cls.set_args_proj() return cls.quantile_arg_proj - def get_args_proj(self, in_features: int, prefix: Optional[str] = None) : + def get_args_proj(self, in_features: int, prefix: Optional[str] = None): return self.args_proj(in_features) def distribution( - self, distr_args, scale: Optional[torch.Tensor] = None, + self, + distr_args, + scale: Optional[torch.Tensor] = None, ) -> ImplicitQuantile: args_proj = self.get_args_proj(self.in_features) @@ -597,7 +535,8 @@ class ImplicitQuantileOutput(IndependentDistributionOutput): implicit_quantile_function=implicit_quantile_function, taus=list(args_proj.buffers())[0], nn_output=list(args_proj.buffers())[1], - predicted_quantiles=distr_args) + predicted_quantiles=distr_args, + ) if scale is None: return distr else: @@ -608,6 +547,3 @@ class ImplicitQuantileOutput(IndependentDistributionOutput): @property def event_shape(self) -> Tuple: return () - - - diff --git a/pts/modules/feature.py b/pts/modules/feature.py index db794e2..0ae865e 100644 --- a/pts/modules/feature.py +++ b/pts/modules/feature.py @@ -5,7 +5,11 @@ import torch.nn as nn class FeatureEmbedder(nn.Module): - def __init__(self, cardinalities: List[int], embedding_dims: List[int],) -> None: + def __init__( + self, + cardinalities: List[int], + embedding_dims: List[int], + ) -> None: super().__init__() self.__num_features = len(cardinalities) diff --git a/pts/modules/iqn_modules.py b/pts/modules/iqn_modules.py index bd6a2ed..0fe8d37 100644 --- a/pts/modules/iqn_modules.py +++ b/pts/modules/iqn_modules.py @@ -52,6 +52,6 @@ class QuantileLayer(nn.Module): integers = torch.repeat_interleave( torch.arange(0, self.n_cos_embedding).unsqueeze(dim=0), repeats=tau.shape[-1], - dim=0 + dim=0, ).to(tau.device) return torch.cos(pi * tau.unsqueeze(dim=-1) * integers) diff --git a/pts/modules/lambda_layer.py b/pts/modules/lambda_layer.py deleted file mode 100644 index 732bafc..0000000 --- a/pts/modules/lambda_layer.py +++ /dev/null @@ -1,10 +0,0 @@ -import torch.nn as nn - - -class LambdaLayer(nn.Module): - def __init__(self, function): - super().__init__() - self._func = function - - def forward(self, x, *args): - return self._func(x, *args) diff --git a/pts/modules/scaler.py b/pts/modules/scaler.py index 1972446..7a80e3a 100644 --- a/pts/modules/scaler.py +++ b/pts/modules/scaler.py @@ -37,7 +37,7 @@ class Scaler(ABC, nn.Module): Tensor Tensor containing the "scaled" data, shape: (N, T, C) or (N, C, T). Tensor - Tensor containing the scale, of shape (N, C) if ``keepdim == False``, + Tensor containing the scale, of shape (N, C) if ``keepdim == False``, and shape (N, 1, C) or (N, C, 1) if ``keepdim == True``. """ diff --git a/pts/trainer.py b/pts/trainer.py index 8c61fc3..af9a166 100644 --- a/pts/trainer.py +++ b/pts/trainer.py @@ -1,14 +1,17 @@ import time -from typing import List, Optional +from typing import List, Optional, Union + +from tqdm import tqdm import torch import torch.nn as nn -from torch.utils.data import DataLoader -from torch.utils.tensorboard import SummaryWriter -from tqdm import tqdm + +from gluonts.core.component import validated +from gluonts.dataset.loader import TrainDataLoader, ValidationDataLoader class Trainer: + @validated() def __init__( self, epochs: int = 100, @@ -18,7 +21,7 @@ class Trainer: pin_memory: bool = False, learning_rate: float = 1e-3, weight_decay: float = 1e-6, - device: Optional[torch.device] = None, + device: Optional[Union[torch.device, str]] = None, ) -> None: self.epochs = epochs self.batch_size = batch_size @@ -30,26 +33,26 @@ class Trainer: self.pin_memory = pin_memory def __call__( - self, net: nn.Module, input_names: List[str], data_loader: DataLoader + self, + net: nn.Module, + train_iter: TrainDataLoader, + validation_iter: Optional[ValidationDataLoader] = None, ) -> None: optimizer = torch.optim.Adam( net.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay ) - writer = SummaryWriter() - #writer.add_graph(net) - for epoch_no in range(self.epochs): # mark epoch start time tic = time.time() avg_epoch_loss = 0.0 - with tqdm(data_loader) as it: + with tqdm(train_iter) as it: for batch_no, data_entry in enumerate(it, start=1): optimizer.zero_grad() - inputs = [data_entry[k].to(self.device) for k in input_names] + #inputs = [data_entry[k].to(self.device) for k in input_names] - output = net(*inputs) + output = net(*data_entry.values()) if isinstance(output, (list, tuple)): loss = output[0] else: @@ -63,18 +66,20 @@ class Trainer: }, refresh=False, ) - n_iter = epoch_no*self.num_batches_per_epoch + batch_no - writer.add_scalar('Loss/train', loss.item(), n_iter) + n_iter = epoch_no * self.num_batches_per_epoch + batch_no + #.add_scalar("Loss/train", loss.item(), n_iter) loss.backward() optimizer.step() if self.num_batches_per_epoch == batch_no: - for name, param in net.named_parameters(): - writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter) + # for name, param in net.named_parameters(): + # writer.add_histogram( + # name, param.clone().cpu().data.numpy(), n_iter + # ) break # mark epoch end time and log time cost of current epoch toc = time.time() - - writer.close() + + #writer.close() diff --git a/pts/transform/__init__.py b/pts/transform/__init__.py deleted file mode 100644 index 12cc028..0000000 --- a/pts/transform/__init__.py +++ /dev/null @@ -1,52 +0,0 @@ -from .convert import ( - AsNumpyArray, - ExpandDimArray, - VstackFeatures, - ConcatFeatures, - SwapAxes, - ListFeatures, - TargetDimIndicator, - SampleTargetDim, - CDFtoGaussianTransform, - cdf_to_gaussian_forward_transform, -) -from .dataset import TransformedDataset -from .feature import ( - target_transformation_length, - AddObservedValuesIndicator, - AddConstFeature, - AddTimeFeatures, - AddAgeFeature, -) -from .field import ( - RemoveFields, - RenameFields, - SetField, - SetFieldIfNotPresent, - SelectFields, -) -from .sampler import ( - InstanceSampler, - UniformSplitSampler, - TestSplitSampler, - ExpectedNumInstanceSampler, - BucketInstanceSampler, - ContinuousTimePointSampler, - ContinuousTimeUniformSampler, -) -from .split import ( - shift_timestamp, - InstanceSplitter, - CanonicalInstanceSplitter, - ContinuousTimeInstanceSplitter, -) -from .transform import ( - Transformation, - Chain, - Identity, - MapTransformation, - SimpleTransformation, - AdhocTransform, - FlatMapTransformation, - FilterTransformation, -) diff --git a/pts/transform/convert.py b/pts/transform/convert.py deleted file mode 100644 index 253b5dd..0000000 --- a/pts/transform/convert.py +++ /dev/null @@ -1,713 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from typing import Iterator, List, Tuple, Optional - -import numpy as np -import torch -from scipy.special import erf, erfinv - -from pts.core.component import validated -from pts.dataset import DataEntry -from pts.exception import assert_pts -from .transform import ( - SimpleTransformation, - MapTransformation, - FlatMapTransformation, -) - - -class AsNumpyArray(SimpleTransformation): - """ - Converts the value of a field into a numpy array. - - Parameters - ---------- - expected_ndim - Expected number of dimensions. Throws an exception if the number of - dimensions does not match. - dtype - numpy dtype to use. - """ - @validated() - def __init__( - self, field: str, expected_ndim: int, dtype: np.dtype = np.float32 - ) -> None: - self.field = field - self.expected_ndim = expected_ndim - self.dtype = dtype - - def transform(self, data: DataEntry) -> DataEntry: - value = data[self.field] - if not isinstance(value, float): - # this lines produces "ValueError: setting an array element with a - # sequence" on our test - # value = np.asarray(value, dtype=np.float32) - # see https://stackoverflow.com/questions/43863748/ - value = np.asarray(list(value), dtype=self.dtype) - else: - # ugly: required as list conversion will fail in the case of a - # float - value = np.asarray(value, dtype=self.dtype) - assert_pts( - value.ndim >= self.expected_ndim, - 'Input for field "{self.field}" does not have the required' - "dimension (field: {self.field}, ndim observed: {value.ndim}, " - "expected ndim: {self.expected_ndim})", - value=value, - self=self, - ) - data[self.field] = value - return data - - -class ExpandDimArray(SimpleTransformation): - """ - Expand dims in the axis specified, if the axis is not present does nothing. - (This essentially calls np.expand_dims) - - Parameters - ---------- - field - Field in dictionary to use - axis - Axis to expand (see np.expand_dims for details) - """ - @validated() - def __init__(self, field: str, axis: Optional[int] = None) -> None: - self.field = field - self.axis = axis - - def transform(self, data: DataEntry) -> DataEntry: - if self.axis is not None: - data[self.field] = np.expand_dims(data[self.field], axis=self.axis) - return data - - -class VstackFeatures(SimpleTransformation): - """ - Stack fields together using ``np.vstack``. - - Fields with value ``None`` are ignored. - - Parameters - ---------- - output_field - Field name to use for the output - input_fields - Fields to stack together - drop_inputs - If set to true the input fields will be dropped. - """ - @validated() - def __init__( - self, output_field: str, input_fields: List[str], drop_inputs: bool = True, - ) -> None: - self.output_field = output_field - self.input_fields = input_fields - self.cols_to_drop = ( - [] - if not drop_inputs - else [fname for fname in self.input_fields if fname != output_field] - ) - - def transform(self, data: DataEntry) -> DataEntry: - r = [data[fname] for fname in self.input_fields if data[fname] is not None] - output = np.vstack(r) - data[self.output_field] = output - for fname in self.cols_to_drop: - del data[fname] - return data - - -class ConcatFeatures(SimpleTransformation): - """ - Concatenate fields together using ``np.concatenate``. - - Fields with value ``None`` are ignored. - - Parameters - ---------- - output_field - Field name to use for the output - input_fields - Fields to stack together - drop_inputs - If set to true the input fields will be dropped. - """ - @validated() - def __init__( - self, output_field: str, input_fields: List[str], drop_inputs: bool = True, - ) -> None: - self.output_field = output_field - self.input_fields = input_fields - self.cols_to_drop = ( - [] - if not drop_inputs - else [fname for fname in self.input_fields if fname != output_field] - ) - - def transform(self, data: DataEntry) -> DataEntry: - r = [data[fname] for fname in self.input_fields if data[fname] is not None] - output = np.concatenate(r) - data[self.output_field] = output - for fname in self.cols_to_drop: - del data[fname] - return data - - -class SwapAxes(SimpleTransformation): - """ - Apply `np.swapaxes` to fields. - - Parameters - ---------- - input_fields - Field to apply to - axes - Axes to use - """ - @validated() - def __init__(self, input_fields: List[str], axes: Tuple[int, int]) -> None: - self.input_fields = input_fields - self.axis1, self.axis2 = axes - - def transform(self, data: DataEntry) -> DataEntry: - for field in self.input_fields: - data[field] = self.swap(data[field]) - return data - - def swap(self, v): - if isinstance(v, np.ndarray): - return np.swapaxes(v, self.axis1, self.axis2) - if isinstance(v, list): - return [self.swap(x) for x in v] - else: - raise ValueError( - f"Unexpected field type {type(v).__name__}, expected " - f"np.ndarray or list[np.ndarray]" - ) - - -class ListFeatures(SimpleTransformation): - """ - Creates a new field which contains a list of features. - - Parameters - ---------- - output_field - Field name for output - input_fields - Fields to combine into list - drop_inputs - If true the input fields will be removed from the result. - """ - @validated() - def __init__( - self, output_field: str, input_fields: List[str], drop_inputs: bool = True, - ) -> None: - self.output_field = output_field - self.input_fields = input_fields - self.cols_to_drop = ( - [] - if not drop_inputs - else [fname for fname in self.input_fields if fname != output_field] - ) - - def transform(self, data: DataEntry) -> DataEntry: - data[self.output_field] = [data[fname] for fname in self.input_fields] - for fname in self.cols_to_drop: - del data[fname] - return data - - -class TargetDimIndicator(SimpleTransformation): - """ - Label-encoding of the target dimensions. - """ - @validated() - def __init__(self, field_name: str, target_field: str) -> None: - self.field_name = field_name - self.target_field = target_field - - def transform(self, data: DataEntry) -> DataEntry: - data[self.field_name] = np.arange(0, data[self.target_field].shape[0]) - return data - - -class SampleTargetDim(FlatMapTransformation): - """ - Samples random dimensions from the target at training time. - """ - @validated() - def __init__( - self, - field_name: str, - target_field: str, - observed_values_field: str, - num_samples: int, - shuffle: bool = True, - ) -> None: - self.field_name = field_name - self.target_field = target_field - self.observed_values_field = observed_values_field - self.num_samples = num_samples - self.shuffle = shuffle - - def flatmap_transform( - self, data: DataEntry, is_train: bool, slice_future_target: bool = True - ) -> Iterator[DataEntry]: - if not is_train: - yield data - else: - # (target_dim,) - target_dimensions = data[self.field_name] - - if self.shuffle: - np.random.shuffle(target_dimensions) - - target_dimensions = target_dimensions[: self.num_samples] - - data[self.field_name] = target_dimensions - # (seq_len, target_dim) -> (seq_len, num_samples) - - for field in [ - f"past_{self.target_field}", - f"future_{self.target_field}", - f"past_{self.observed_values_field}", - f"future_{self.observed_values_field}", - ]: - data[field] = data[field][:, target_dimensions] - - yield data - - -class CDFtoGaussianTransform(MapTransformation): - """ - Marginal transformation that transforms the target via an empirical CDF - to a standard gaussian as described here: https://arxiv.org/abs/1910.03002 - - To be used in conjunction with a multivariate gaussian to from a copula. - Note that this transformation is currently intended for multivariate - targets only. - """ - @validated() - def __init__( - self, - target_dim: int, - target_field: str, - observed_values_field: str, - cdf_suffix="_cdf", - max_context_length: Optional[int] = None, - ) -> None: - """ - Constructor for CDFtoGaussianTransform. - - Parameters - ---------- - target_dim - Dimensionality of the target. - target_field - Field that will be transformed. - observed_values_field - Field that indicates observed values. - cdf_suffix - Suffix to mark the field with the transformed target. - max_context_length - Sets the maximum context length for the empirical CDF. - """ - self.target_field = target_field - self.past_target_field = "past_" + self.target_field - self.future_target_field = "future_" + self.target_field - self.past_observed_field = f"past_{observed_values_field}" - self.sort_target_field = f"past_{target_field}_sorted" - self.slopes_field = "slopes" - self.intercepts_field = "intercepts" - self.cdf_suffix = cdf_suffix - self.max_context_length = max_context_length - self.target_dim = target_dim - - def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: - self._preprocess_data(data, is_train=is_train) - self._calc_pw_linear_params(data) - - for target_field in [self.past_target_field, self.future_target_field]: - data[target_field + self.cdf_suffix] = self.standard_gaussian_ppf( - self._empirical_cdf_forward_transform( - data[self.sort_target_field], - data[target_field], - data[self.slopes_field], - data[self.intercepts_field], - ) - ) - return data - - def _preprocess_data(self, data: DataEntry, is_train: bool): - """ - Performs several preprocess operations for computing the empirical CDF. - 1) Reshaping the data. - 2) Normalizing the target length. - 3) Adding noise to avoid zero slopes (training only) - 4) Sorting the target to compute the empirical CDF - - Parameters - ---------- - data - DataEntry with input data. - is_train - if is_train is True, this function adds noise to the target to - avoid zero slopes in the piece-wise linear function. - Returns - ------- - - """ - # (target_length, target_dim) - past_target_vec = data[self.past_target_field].copy() - - # pick only observed values - target_length, target_dim = past_target_vec.shape - - # (target_length, target_dim) - past_observed = (data[self.past_observed_field] > 0) * ( - data["past_is_pad"].reshape((-1, 1)) == 0 - ) - assert past_observed.ndim == 2 - assert target_dim == self.target_dim - - past_target_vec = past_target_vec[past_observed.min(axis=1)] - - assert past_target_vec.ndim == 2 - assert past_target_vec.shape[1] == self.target_dim - - expected_length = ( - target_length - if self.max_context_length is None - else self.max_context_length - ) - - if target_length != expected_length: - # Fills values in the case where past_target_vec.shape[-1] < - # target_length - # as dataset.loader.BatchBuffer does not support varying shapes - past_target_vec = CDFtoGaussianTransform._fill( - past_target_vec, expected_length - ) - - # sorts along the time dimension to compute empirical CDF of each - # dimension - if is_train: - past_target_vec = self._add_noise(past_target_vec) - - past_target_vec.sort(axis=0) - - assert past_target_vec.shape == (expected_length, self.target_dim) - - data[self.sort_target_field] = past_target_vec - - def _calc_pw_linear_params(self, data: DataEntry): - """ - Calculates the piece-wise linear parameters to interpolate between - the observed values in the empirical CDF. - - Once current limitation is that we use a zero slope line as the last - piece. Thus, we cannot forecast anything higher than the highest - observed value. - - Parameters - ---------- - data - Input data entry containing a sorted target field. - - Returns - ------- - - """ - sorted_target = data[self.sort_target_field] - sorted_target_length, target_dim = sorted_target.shape - - quantiles = np.stack( - [np.arange(sorted_target_length) for _ in range(target_dim)], axis=1, - ) / float(sorted_target_length) - - x_diff = np.diff(sorted_target, axis=0) - y_diff = np.diff(quantiles, axis=0) - - # Calculate slopes of the pw-linear pieces. - slopes = np.where(x_diff == 0.0, np.zeros_like(x_diff), y_diff / x_diff) - - zeroes = np.zeros_like(np.expand_dims(slopes[0, :], axis=0)) - slopes = np.append(slopes, zeroes, axis=0) - - # Calculate intercepts of the pw-linear pieces. - intercepts = quantiles - slopes * sorted_target - - # Populate new fields with the piece-wise linear parameters. - data[self.slopes_field] = slopes - data[self.intercepts_field] = intercepts - - def _empirical_cdf_forward_transform( - self, - sorted_values: np.ndarray, - values: np.ndarray, - slopes: np.ndarray, - intercepts: np.ndarray, - ) -> np.ndarray: - """ - Applies the empirical CDF forward transformation. - - Parameters - ---------- - sorted_values - Sorted target vector. - values - Values (real valued) that will be transformed to empirical CDF - values. - slopes - Slopes of the piece-wise linear function. - intercepts - Intercepts of the piece-wise linear function. - - Returns - ------- - quantiles - Empirical CDF quantiles in [0, 1] interval with winzorized cutoff. - - """ - m = sorted_values.shape[0] - quantiles = self._forward_transform(sorted_values, values, slopes, intercepts) - - quantiles = np.clip( - quantiles, self.winsorized_cutoff(m), 1 - self.winsorized_cutoff(m) - ) - return quantiles - - @staticmethod - def _add_noise(x: np.array) -> np.array: - scale_noise = 0.2 - std = np.sqrt( - (np.square(x - x.mean(axis=1, keepdims=True))).mean(axis=1, keepdims=True) - ) - noise = np.random.normal( - loc=np.zeros_like(x), scale=np.ones_like(x) * std * scale_noise - ) - x = x + noise - return x - - @staticmethod - def _search_sorted(sorted_vec: np.array, to_insert_vec: np.array) -> np.array: - """ - Finds the indices of the active piece-wise linear function. - - Parameters - ---------- - sorted_vec - Sorted target vector. - to_insert_vec - Vector for which the indicies of the active linear functions - will be computed - - Returns - ------- - indices - Indices mapping to the active linear function. - """ - indices_left = np.searchsorted(sorted_vec, to_insert_vec, side="left") - indices_right = np.searchsorted(sorted_vec, to_insert_vec, side="right") - - indices = indices_left + (indices_right - indices_left) // 2 - indices = indices - 1 - indices = np.minimum(indices, len(sorted_vec) - 1) - indices[indices < 0] = 0 - return indices - - def _forward_transform( - self, - sorted_vec: np.array, - target: np.array, - slopes: np.array, - intercepts: np.array, - ) -> np.array: - """ - Applies the forward transformation to the marginals of the multivariate - target. Target (real valued) -> empirical cdf [0, 1] - - Parameters - ---------- - sorted_vec - Sorted (past) target vector. - target - Target that will be transformed. - slopes - Slopes of the piece-wise linear function. - intercepts - Intercepts of the piece-wise linear function - - Returns - ------- - transformed_target - Transformed target vector. - """ - transformed = list() - for sorted, t, slope, intercept in zip( - sorted_vec.transpose(), - target.transpose(), - slopes.transpose(), - intercepts.transpose(), - ): - indices = self._search_sorted(sorted, t) - transformed_value = slope[indices] * t + intercept[indices] - transformed.append(transformed_value) - return np.array(transformed).transpose() - - @staticmethod - def standard_gaussian_cdf(x: np.array) -> np.array: - u = x / (np.sqrt(2.0)) - return (erf(u) + 1.0) / 2.0 - - @staticmethod - def standard_gaussian_ppf(y: np.array) -> np.array: - y_clipped = np.clip(y, a_min=1.0e-6, a_max=1.0 - 1.0e-6) - return np.sqrt(2.0) * erfinv(2.0 * y_clipped - 1.0) - - @staticmethod - def winsorized_cutoff(m: np.array) -> np.array: - """ - Apply truncation to the empirical CDF estimator to reduce variance as - described here: https://arxiv.org/abs/0903.0649 - - Parameters - ---------- - m - Input array with empirical CDF values. - - Returns - ------- - res - Truncated empirical CDf values. - """ - res = 1 / (4 * m ** 0.25 * np.sqrt(3.14 * np.log(m))) - assert 0 < res < 1 - return res - - @staticmethod - def _fill(target: np.ndarray, expected_length: int) -> np.ndarray: - """ - Makes sure target has at least expected_length time-units by repeating - it or using zeros. - - Parameters - ---------- - target : shape (seq_len, dim) - expected_length - - Returns - ------- - array of shape (target_length, dim) - """ - - current_length, target_dim = target.shape - if current_length == 0: - # todo handle the case with no observation better, - # we could use dataset statistics but for now we use zeros - filled_target = np.zeros((expected_length, target_dim)) - elif current_length < expected_length: - filled_target = np.vstack( - [target for _ in range(expected_length // current_length + 1)] - ) - filled_target = filled_target[:expected_length] - elif current_length > expected_length: - filled_target = target[-expected_length:] - else: - filled_target = target - - assert filled_target.shape == (expected_length, target_dim) - - return filled_target - - -def cdf_to_gaussian_forward_transform( - input_batch: DataEntry, outputs: torch.Tensor -) -> np.ndarray: - """ - Forward transformation of the CDFtoGaussianTransform. - - Parameters - ---------- - input_batch - Input data to the predictor. - outputs - Predictor outputs. - Returns - ------- - outputs - Forward transformed outputs. - - """ - - def _empirical_cdf_inverse_transform( - batch_target_sorted: torch.Tensor, - batch_predictions: torch.Tensor, - slopes: torch.Tensor, - intercepts: torch.Tensor, - ) -> np.ndarray: - """ - Apply forward transformation of the empirical CDF. - - Parameters - ---------- - batch_target_sorted - Sorted targets of the input batch. - batch_predictions - Predictions of the underlying probability distribution - slopes - Slopes of the piece-wise linear function. - intercepts - Intercepts of the piece-wise linear function. - - Returns - ------- - outputs - Forward transformed outputs. - - """ - slopes = slopes.cpu().numpy() - intercepts = intercepts.cpu().numpy() - - batch_target_sorted = batch_target_sorted.cpu().numpy() - _, num_timesteps, _ = batch_target_sorted.shape - indices = np.floor(batch_predictions * num_timesteps) - # indices = indices - 1 - # for now project into [0, 1] - indices = np.clip(indices, 0, num_timesteps - 1) - indices = indices.astype(np.int) - - transformed = np.where( - np.take_along_axis(slopes, indices, axis=1) != 0.0, - (batch_predictions - np.take_along_axis(intercepts, indices, axis=1)) - / np.take_along_axis(slopes, indices, axis=1), - np.take_along_axis(batch_target_sorted, indices, axis=1), - ) - return transformed - - # applies inverse cdf to all outputs - _, samples, _, _ = outputs.shape - for sample_index in range(0, samples): - outputs[:, sample_index, :, :] = _empirical_cdf_inverse_transform( - input_batch["past_target_sorted"], - CDFtoGaussianTransform.standard_gaussian_cdf( - outputs[:, sample_index, :, :] - ), - input_batch["slopes"], - input_batch["intercepts"], - ) - return outputs diff --git a/pts/transform/dataset.py b/pts/transform/dataset.py deleted file mode 100644 index 6b73ead..0000000 --- a/pts/transform/dataset.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from typing import Iterator, List - -from pts.dataset import DataEntry, Dataset -from .transform import Chain, Transformation - - -class TransformedDataset(Dataset): - """ - A dataset that corresponds to applying a list of transformations to each - element in the base_dataset. - This only supports SimpleTransformations, which do the same thing at - prediction and training time. - - - Parameters - ---------- - base_dataset - Dataset to transform - transformations - List of transformations to apply - """ - - def __init__( - self, base_dataset: Dataset, transformations: List[Transformation] - ) -> None: - self.base_dataset = base_dataset - self.transformations = Chain(transformations) - - def __iter__(self) -> Iterator[DataEntry]: - yield from self.transformations(self.base_dataset, is_train=True) - - def __len__(self): - return sum(1 for _ in self) diff --git a/pts/transform/feature.py b/pts/transform/feature.py deleted file mode 100644 index 84062a6..0000000 --- a/pts/transform/feature.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from typing import List - -import numpy as np -import pandas as pd - -from pts.core.component import validated -from pts.dataset import DataEntry -from pts.feature import TimeFeature -from .split import shift_timestamp -from .transform import SimpleTransformation, MapTransformation - - -def target_transformation_length( - target: np.array, pred_length: int, is_train: bool -) -> int: - return target.shape[-1] + (0 if is_train else pred_length) - - -class AddObservedValuesIndicator(SimpleTransformation): - """ - Replaces missing values in a numpy array (NaNs) with a dummy value and adds - an "observed"-indicator that is ``1`` when values are observed and ``0`` - when values are missing. - - - Parameters - ---------- - target_field - Field for which missing values will be replaced - output_field - Field name to use for the indicator - dummy_value - Value to use for replacing missing values. - convert_nans - If set to true (default) missing values will be replaced. Otherwise - they will not be replaced. In any case the indicator is included in the - result. - """ - @validated() - def __init__( - self, - target_field: str, - output_field: str, - dummy_value: int = 0, - convert_nans: bool = True, - dtype: np.dtype = np.float32, - ) -> None: - self.dummy_value = dummy_value - self.target_field = target_field - self.output_field = output_field - self.convert_nans = convert_nans - self.dtype = dtype - - def transform(self, data: DataEntry) -> DataEntry: - value = data[self.target_field] - nan_indices = np.where(np.isnan(value)) - nan_entries = np.isnan(value) - - if self.convert_nans: - value[nan_indices] = self.dummy_value - - data[self.target_field] = value - # Invert bool array so that missing values are zeros and store as float - data[self.output_field] = np.invert(nan_entries).astype(self.dtype) - return data - - -class AddConstFeature(MapTransformation): - """ - Expands a `const` value along the time axis as a dynamic feature, where - the T-dimension is defined as the sum of the `pred_length` parameter and - the length of a time series specified by the `target_field`. - - If `is_train=True` the feature matrix has the same length as the `target` field. - If `is_train=False` the feature matrix has length len(target) + pred_length - - Parameters - ---------- - output_field - Field name for output. - target_field - Field containing the target array. The length of this array will be used. - pred_length - Prediction length (this is necessary since - features have to be available in the future) - const - Constant value to use. - dtype - Numpy dtype to use for resulting array. - """ - @validated() - def __init__( - self, - output_field: str, - target_field: str, - pred_length: int, - const: float = 1.0, - dtype: np.dtype = np.float32, - ) -> None: - self.pred_length = pred_length - self.const = const - self.dtype = dtype - self.output_field = output_field - self.target_field = target_field - - def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: - length = target_transformation_length( - data[self.target_field], self.pred_length, is_train=is_train - ) - data[self.output_field] = self.const * np.ones( - shape=(1, length), dtype=self.dtype - ) - return data - - -class AddTimeFeatures(MapTransformation): - """ - Adds a set of time features. - - If `is_train=True` the feature matrix has the same length as the `target` field. - If `is_train=False` the feature matrix has length len(target) + pred_length - - Parameters - ---------- - start_field - Field with the start time stamp of the time series - target_field - Field with the array containing the time series values - output_field - Field name for result. - time_features - list of time features to use. - pred_length - Prediction length - """ - @validated() - def __init__( - self, - start_field: str, - target_field: str, - output_field: str, - time_features: List[TimeFeature], - pred_length: int, - ) -> None: - self.date_features = time_features - self.pred_length = pred_length - self.start_field = start_field - self.target_field = target_field - self.output_field = output_field - self._min_time_point: pd.Timestamp = None - self._max_time_point: pd.Timestamp = None - self._full_range_date_features: np.ndarray = None - self._date_index: pd.DatetimeIndex = None - - def _update_cache(self, start: pd.Timestamp, length: int) -> None: - end = shift_timestamp(start, length) - if self._min_time_point is not None: - if self._min_time_point <= start and end <= self._max_time_point: - return - if self._min_time_point is None: - self._min_time_point = start - self._max_time_point = end - self._min_time_point = min(shift_timestamp(start, -50), self._min_time_point) - self._max_time_point = max(shift_timestamp(end, 50), self._max_time_point) - self.full_date_range = pd.date_range( - self._min_time_point, self._max_time_point, freq=start.freq - ) - self._full_range_date_features = ( - np.vstack([feat(self.full_date_range) for feat in self.date_features]) - if self.date_features - else None - ) - self._date_index = pd.Series( - index=self.full_date_range, data=np.arange(len(self.full_date_range)), - ) - - def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: - start = data[self.start_field] - length = target_transformation_length( - data[self.target_field], self.pred_length, is_train=is_train - ) - self._update_cache(start, length) - i0 = self._date_index[start] - features = ( - self._full_range_date_features[..., i0 : i0 + length] - if self.date_features - else None - ) - data[self.output_field] = features - return data - - -class AddAgeFeature(MapTransformation): - """ - Adds an 'age' feature to the data_entry. - - The age feature starts with a small value at the start of the time series - and grows over time. - - If `is_train=True` the age feature has the same length as the `target` - field. - If `is_train=False` the age feature has length len(target) + pred_length - - Parameters - ---------- - target_field - Field with target values (array) of time series - output_field - Field name to use for the output. - pred_length - Prediction length - log_scale - If set to true the age feature grows logarithmically otherwise linearly - over time. - """ - @validated() - def __init__( - self, - target_field: str, - output_field: str, - pred_length: int, - log_scale: bool = True, - dtype: np.dtype = np.float32, - ) -> None: - self.pred_length = pred_length - self.target_field = target_field - self.feature_name = output_field - self.log_scale = log_scale - self._age_feature = np.zeros(0) - self.dtype = dtype - - def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: - length = target_transformation_length( - data[self.target_field], self.pred_length, is_train=is_train - ) - - if self.log_scale: - age = np.log10(2.0 + np.arange(length, dtype=self.dtype)) - else: - age = np.arange(length, dtype=self.dtype) - - data[self.feature_name] = age.reshape((1, length)) - - return data diff --git a/pts/transform/field.py b/pts/transform/field.py deleted file mode 100644 index ccbfa19..0000000 --- a/pts/transform/field.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from collections import Counter -from typing import Any, Dict, List - -from pts.core.component import validated -from pts.dataset import DataEntry -from .transform import SimpleTransformation, MapTransformation - - -class RenameFields(SimpleTransformation): - """ - Rename fields using a mapping - - Parameters - ---------- - mapping - Name mapping `input_name -> output_name` - """ - @validated() - def __init__(self, mapping: Dict[str, str]) -> None: - self.mapping = mapping - values_count = Counter(mapping.values()) - for new_key, count in values_count.items(): - assert count == 1, f"Mapped key {new_key} occurs multiple time" - - def transform(self, data: DataEntry): - for key, new_key in self.mapping.items(): - if key not in data: - continue - assert new_key not in data - data[new_key] = data[key] - del data[key] - return data - - -class RemoveFields(SimpleTransformation): - - @validated() - def __init__(self, field_names: List[str]) -> None: - self.field_names = field_names - - def transform(self, data: DataEntry) -> DataEntry: - for k in self.field_names: - if k in data.keys(): - del data[k] - return data - - -class SetField(SimpleTransformation): - """ - Sets a field in the dictionary with the given value. - - Parameters - ---------- - output_field - Name of the field that will be set - value - Value to be set - """ - @validated() - def __init__(self, output_field: str, value: Any) -> None: - self.output_field = output_field - self.value = value - - def transform(self, data: DataEntry) -> DataEntry: - data[self.output_field] = self.value - return data - - -class SetFieldIfNotPresent(SimpleTransformation): - """Sets a field in the dictionary with the given value, in case it does not - exist already. - - Parameters - ---------- - output_field - Name of the field that will be set - value - Value to be set - """ - @validated() - def __init__(self, field: str, value: Any) -> None: - self.output_field = field - self.value = value - - def transform(self, data: DataEntry) -> DataEntry: - if self.output_field not in data.keys(): - data[self.output_field] = self.value - return data - - -class SelectFields(MapTransformation): - """ - Only keep the listed fields - - Parameters - ---------- - input_fields - List of fields to keep. - """ - @validated() - def __init__(self, input_fields: List[str]) -> None: - self.input_fields = input_fields - - def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry: - return {f: data[f] for f in self.input_fields} diff --git a/pts/transform/sampler.py b/pts/transform/sampler.py deleted file mode 100644 index b61a610..0000000 --- a/pts/transform/sampler.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from abc import ABC, abstractmethod - -import numpy as np - -from pts.core.component import validated -from pts.dataset.stat import ScaleHistogram - - -class InstanceSampler(ABC): - """ - An InstanceSampler is called with the time series and the valid - index bounds a, b and should return a set of indices a <= i <= b - at which training instances will be generated. - - The object should be called with: - - Parameters - ---------- - ts - target that should be sampled with shape (dim, seq_len) - a - first index of the target that can be sampled - b - last index of the target that can be sampled - - Returns - ------- - np.ndarray - Selected points to sample - """ - - @abstractmethod - def __call__(self, ts: np.ndarray, a: int, b: int) -> np.ndarray: - pass - - -class UniformSplitSampler(InstanceSampler): - """ - Samples each point with the same fixed probability. - - Parameters - ---------- - p - Probability of selecting a time point - """ - - def __init__(self, p: float) -> None: - self.p = p - - def __call__(self, ts: np.ndarray, a: int, b: int) -> np.ndarray: - assert a <= b, "First index must be less than or equal to the last index." - - window_size = b - a + 1 - (indices,) = np.where(np.random.random_sample(window_size) < self.p) - return indices + a - - -class TestSplitSampler(InstanceSampler): - """ - Sampler used for prediction. Always selects the last time point for - splitting i.e. the forecast point for the time series. - """ - - def __init__(self) -> None: - pass - - def __call__(self, ts: np.ndarray, a: int, b: int) -> np.ndarray: - return np.array([b]) - - -class ExpectedNumInstanceSampler(InstanceSampler): - """ - Keeps track of the average time series length and adjusts the probability - per time point such that on average `num_instances` training examples are - generated per time series. - - Parameters - ---------- - - num_instances - number of training examples generated per time series on average - """ - @validated() - def __init__(self, num_instances: float) -> None: - self.num_instances = num_instances - self.total_length = 0 - self.n = 0 - - def __call__(self, ts: np.ndarray, a: int, b: int) -> np.ndarray: - window_size = b - a + 1 - self.n += 1 - self.total_length += window_size - avg_length = self.total_length / self.n - - sampler = UniformSplitSampler(self.num_instances / avg_length) - return sampler(ts, a, b) - - -class BucketInstanceSampler(InstanceSampler): - """ - This sample can be used when working with a set of time series that have a - skewed distributions. For instance, if the dataset contains many time series - with small values and few with large values. - - The probability of sampling from bucket i is the inverse of its number of elements. - - Parameters - ---------- - scale_histogram - The histogram of scale for the time series. Here scale is the mean abs - value of the time series. - """ - - def __init__(self, scale_histogram: ScaleHistogram) -> None: - # probability of sampling a bucket i is the inverse of its number of - # elements - self.scale_histogram = scale_histogram - self.lookup = np.arange(2 ** 13) - - def __call__(self, ts: np.ndarray, a: int, b: int) -> None: - while ts.shape[-1] >= len(self.lookup): - self.lookup = np.arange(2 * len(self.lookup)) - p = 1.0 / self.scale_histogram.count(ts) - mask = np.random.uniform(low=0.0, high=1.0, size=b - a + 1) < p - indices = self.lookup[a : a + len(mask)][mask] - return indices - - -class ContinuousTimePointSampler(ABC): - """ - Abstract class for "continuous time" samplers, which, given a lower bound - and upper bound, sample "points" (events) in continuous time from a - specified interval. - """ - - def __init__(self, num_instances: int) -> None: - self.num_instances = num_instances - - @abstractmethod - def __call__(self, a: float, b: float) -> np.ndarray: - """ - Returns random points in the real interval between :code:`a` and - :code:`b`. - - Parameters - ---------- - a - The lower bound (minimum time value that a sampled point can take) - b - Upper bound. Must be greater than a. - """ - pass - - -class ContinuousTimeUniformSampler(ContinuousTimePointSampler): - """ - Implements a simple random sampler to sample points in the continuous - interval between :code:`a` and :code:`b`. - """ - - def __call__(self, a: float, b: float) -> np.ndarray: - assert a <= b, "Interval start time must be before interval end time." - return np.random.rand(self.num_instances) * (b - a) + a diff --git a/pts/transform/split.py b/pts/transform/split.py deleted file mode 100644 index 1912e41..0000000 --- a/pts/transform/split.py +++ /dev/null @@ -1,529 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -from functools import lru_cache -from typing import Iterator, List, Optional - -import numpy as np -import pandas as pd - -from pts.core.component import validated -from pts.dataset import DataEntry, FieldName -from .sampler import InstanceSampler, ContinuousTimePointSampler -from .transform import FlatMapTransformation - - -def shift_timestamp(ts: pd.Timestamp, offset: int) -> pd.Timestamp: - """ - Computes a shifted timestamp. - - Basic wrapping around pandas ``ts + offset`` with caching and exception - handling. - """ - return _shift_timestamp_helper(ts, ts.freq, offset) - - -@lru_cache(maxsize=10000) -def _shift_timestamp_helper(ts: pd.Timestamp, freq: str, offset: int) -> pd.Timestamp: - """ - We are using this helper function which explicitly uses the frequency as a - parameter, because the frequency is not included in the hash of a time - stamp. - - I.e. - pd.Timestamp(x, freq='1D') and pd.Timestamp(x, freq='1min') - - hash to the same value. - """ - try: - # this line looks innocent, but can create a date which is out of - # bounds values over year 9999 raise a ValueError - # values over 2262-04-11 raise a pandas OutOfBoundsDatetime - return ts + offset * ts.freq - except (ValueError, pd._libs.OutOfBoundsDatetime) as ex: - raise Exception(ex) - - -class InstanceSplitter(FlatMapTransformation): - """ - Selects training instances, by slicing the target and other time series - like arrays at random points in training mode or at the last time point in - prediction mode. Assumption is that all time like arrays start at the same - time point. - - The target and each time_series_field is removed and instead two - corresponding fields with prefix `past_` and `future_` are included. E.g. - - If the target array is one-dimensional, the resulting instance has shape - (len_target). In the multi-dimensional case, the instance has shape (dim, - len_target). - - target -> past_target and future_target - - The transformation also adds a field 'past_is_pad' that indicates whether - values where padded or not. - - Convention: time axis is always the last axis. - - Parameters - ---------- - - target_field - field containing the target - is_pad_field - output field indicating whether padding happened - start_field - field containing the start date of the time series - forecast_start_field - output field that will contain the time point where the forecast starts - train_sampler - instance sampler that provides sampling indices given a time-series - past_length - length of the target seen before making prediction - future_length - length of the target that must be predicted - time_first - whether to have time series output in (time, dimension) or in - (dimension, time) layout - time_series_fields - fields that contains time-series, they are split in the same interval - as the target - pick_incomplete - whether training examples can be sampled with only a part of - past_length time-units - present for the time series. This is useful to train models for - cold-start. In such case, is_pad_out contains an indicator whether - data is padded or not. - """ - - @validated() - def __init__( - self, - target_field: str, - is_pad_field: str, - start_field: str, - forecast_start_field: str, - train_sampler: InstanceSampler, - past_length: int, - future_length: int, - time_first: bool = True, - time_series_fields: Optional[List[str]] = None, - pick_incomplete: bool = True, - ) -> None: - - assert future_length > 0 - - self.train_sampler = train_sampler - self.past_length = past_length - self.future_length = future_length - self.time_first = time_first - self.ts_fields = time_series_fields if time_series_fields is not None else [] - self.target_field = target_field - self.is_pad_field = is_pad_field - self.start_field = start_field - self.forecast_start_field = forecast_start_field - self.pick_incomplete = pick_incomplete - - def _past(self, col_name): - return f"past_{col_name}" - - def _future(self, col_name): - return f"future_{col_name}" - - def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: - pl = self.future_length - slice_cols = self.ts_fields + [self.target_field] - target = data[self.target_field] - - len_target = target.shape[-1] - - minimum_length = ( - self.future_length - if self.pick_incomplete - else self.past_length + self.future_length - ) - - if is_train: - sampling_bounds = ( - (0, len_target - self.future_length) - if self.pick_incomplete - else (self.past_length, len_target - self.future_length) - ) - - # We currently cannot handle time series that are - # too short during training, so we just skip these. - # If we want to include them we would need to pad and to - # mask the loss. - sampled_indices = ( - np.array([], dtype=int) - if len_target < minimum_length - else self.train_sampler(target, *sampling_bounds) - ) - else: - assert self.pick_incomplete or len_target >= self.past_length - sampled_indices = np.array([len_target], dtype=int) - for i in sampled_indices: - pad_length = max(self.past_length - i, 0) - if not self.pick_incomplete: - assert pad_length == 0, f"pad_length should be zero, got {pad_length}" - d = data.copy() - for ts_field in slice_cols: - if i > self.past_length: - # truncate to past_length - past_piece = d[ts_field][..., i - self.past_length : i] - elif i < self.past_length: - pad_block = np.zeros( - d[ts_field].shape[:-1] + (pad_length,), dtype=d[ts_field].dtype, - ) - past_piece = np.concatenate( - [pad_block, d[ts_field][..., :i]], axis=-1 - ) - else: - past_piece = d[ts_field][..., :i] - d[self._past(ts_field)] = past_piece - d[self._future(ts_field)] = d[ts_field][..., i : i + pl] - del d[ts_field] - pad_indicator = np.zeros(self.past_length) - if pad_length > 0: - pad_indicator[:pad_length] = 1 - - if self.time_first: - for ts_field in slice_cols: - d[self._past(ts_field)] = d[self._past(ts_field)].transpose() - d[self._future(ts_field)] = d[self._future(ts_field)].transpose() - - d[self._past(self.is_pad_field)] = pad_indicator - d[self.forecast_start_field] = shift_timestamp(d[self.start_field], i) - yield d - - -class CanonicalInstanceSplitter(FlatMapTransformation): - """ - Selects instances, by slicing the target and other time series - like arrays at random points in training mode or at the last time point in - prediction mode. Assumption is that all time like arrays start at the same - time point. - - In training mode, the returned instances contain past_`target_field` - as well as past_`time_series_fields`. - - In prediction mode, one can set `use_prediction_features` to get - future_`time_series_fields`. - - If the target array is one-dimensional, the `target_field` in the resulting instance has shape - (`instance_length`). In the multi-dimensional case, the instance has shape (`dim`, `instance_length`), - where `dim` can also take a value of 1. - - In the case of insufficient number of time series values, the - transformation also adds a field 'past_is_pad' that indicates whether - values where padded or not, and the value is padded with - `default_pad_value` with a default value 0. - This is done only if `allow_target_padding` is `True`, - and the length of `target` is smaller than `instance_length`. - - Parameters - ---------- - target_field - fields that contains time-series - is_pad_field - output field indicating whether padding happened - start_field - field containing the start date of the time series - forecast_start_field - field containing the forecast start date - instance_sampler - instance sampler that provides sampling indices given a time-series - instance_length - length of the target seen before making prediction - time_first - whether to have time series output in (time, dimension) or in - (dimension, time) layout - time_series_fields - fields that contains time-series, they are split in the same interval - as the target - allow_target_padding - flag to allow padding - pad_value - value to be used for padding - use_prediction_features - flag to indicate if prediction range features should be returned - prediction_length - length of the prediction range, must be set if - use_prediction_features is True - """ - - def __init__( - self, - target_field: str, - is_pad_field: str, - start_field: str, - forecast_start_field: str, - instance_sampler: InstanceSampler, - instance_length: int, - time_first: bool = True, - time_series_fields: List[str] = [], - allow_target_padding: bool = False, - pad_value: float = 0.0, - use_prediction_features: bool = False, - prediction_length: Optional[int] = None, - ) -> None: - self.instance_sampler = instance_sampler - self.instance_length = instance_length - self.time_first = time_first - self.dynamic_feature_fields = time_series_fields - self.target_field = target_field - self.allow_target_padding = allow_target_padding - self.pad_value = pad_value - self.is_pad_field = is_pad_field - self.start_field = start_field - self.forecast_start_field = forecast_start_field - - assert ( - not use_prediction_features or prediction_length is not None - ), "You must specify `prediction_length` if `use_prediction_features`" - - self.use_prediction_features = use_prediction_features - self.prediction_length = prediction_length - - def _past(self, col_name): - return f"past_{col_name}" - - def _future(self, col_name): - return f"future_{col_name}" - - def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: - ts_fields = self.dynamic_feature_fields + [self.target_field] - ts_target = data[self.target_field] - - len_target = ts_target.shape[-1] - - if is_train: - if len_target < self.instance_length: - sampling_indices = ( - # Returning [] for all time series will cause this to be in loop forever! - [len_target] - if self.allow_target_padding - else [] - ) - else: - sampling_indices = self.instance_sampler( - ts_target, self.instance_length, len_target - ) - else: - sampling_indices = [len_target] - - for i in sampling_indices: - d = data.copy() - - pad_length = max(self.instance_length - i, 0) - - # update start field - d[self.start_field] = shift_timestamp( - data[self.start_field], i - self.instance_length - ) - - # set is_pad field - is_pad = np.zeros(self.instance_length) - if pad_length > 0: - is_pad[:pad_length] = 1 - d[self.is_pad_field] = is_pad - - # update time series fields - for ts_field in ts_fields: - full_ts = data[ts_field] - if pad_length > 0: - pad_pre = self.pad_value * np.ones( - shape=full_ts.shape[:-1] + (pad_length,) - ) - past_ts = np.concatenate([pad_pre, full_ts[..., :i]], axis=-1) - else: - past_ts = full_ts[..., (i - self.instance_length) : i] - - past_ts = past_ts.transpose() if self.time_first else past_ts - d[self._past(ts_field)] = past_ts - - if self.use_prediction_features and not is_train: - if not ts_field == self.target_field: - future_ts = full_ts[..., i : i + self.prediction_length] - future_ts = ( - future_ts.transpose() if self.time_first else future_ts - ) - d[self._future(ts_field)] = future_ts - - del d[ts_field] - - d[self.forecast_start_field] = shift_timestamp( - d[self.start_field], self.instance_length - ) - - yield d - - -class ContinuousTimeInstanceSplitter(FlatMapTransformation): - """ - Selects training instances by slicing "intervals" from a continuos-time - process instantiation. Concretely, the input data is expected to describe an - instantiation from a point (or jump) process, with the "target" - identifying inter-arrival times and other features (marks), as described - in detail below. - - The splitter will then take random points in continuous time from each - given observation, and return a (variable-length) array of points in - the past (context) and the future (prediction) intervals. - - The transformation is analogous to its discrete counterpart - `InstanceSplitter` except that - - - It does not allow "incomplete" records. That is, the past and future - intervals sampled are always complete - - Outputs a (T, C) layout. - - Does not accept `time_series_fields` (i.e., only accepts target fields) as these - would typically not be available in TPP data. - - The target arrays are expected to have (2, T) layout where the first axis - corresponds to the (i) interarrival times between consecutive points, in - order and (ii) integer identifiers of marks (from {0, 1, ..., :code:`num_marks`}). - The returned arrays will have (T, 2) layout. - - For example, the array below corresponds to a target array where points with timestamps - 0.5, 1.1, and 1.5 were observed belonging to categories (marks) 3, 1 and 0 - respectively: :code:`[[0.5, 0.6, 0.4], [3, 1, 0]]`. - - Parameters - ---------- - past_interval_length - length of the interval seen before making prediction - future_interval_length - length of the interval that must be predicted - train_sampler - instance sampler that provides sampling indices given a time-series - target_field - field containing the target - start_field - field containing the start date of the of the point process observation - end_field - field containing the end date of the point process observation - forecast_start_field - output field that will contain the time point where the forecast starts - """ - - def __init__( - self, - past_interval_length: float, - future_interval_length: float, - train_sampler: ContinuousTimePointSampler, - target_field: str = FieldName.TARGET, - start_field: str = FieldName.START, - end_field: str = "end", - forecast_start_field: str = FieldName.FORECAST_START, - ) -> None: - - assert ( - future_interval_length > 0 - ), "Prediction interval must have length greater than 0." - - self.train_sampler = train_sampler - self.past_interval_length = past_interval_length - self.future_interval_length = future_interval_length - self.target_field = target_field - self.start_field = start_field - self.end_field = end_field - self.forecast_start_field = forecast_start_field - - # noinspection PyMethodMayBeStatic - def _mask_sorted(self, a: np.ndarray, lb: float, ub: float): - start = np.searchsorted(a, lb) - end = np.searchsorted(a, ub) - return np.arange(start, end) - - def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: - - assert data[self.start_field].freq == data[self.end_field].freq - - total_interval_length = (data[self.end_field] - data[self.start_field]) / data[ - self.start_field - ].freq.delta - - # sample forecast start times in continuous time - if is_train: - if total_interval_length < ( - self.future_interval_length + self.past_interval_length - ): - sampling_times: np.ndarray = np.array([]) - else: - sampling_times = self.train_sampler( - self.past_interval_length, - total_interval_length - self.future_interval_length, - ) - else: - sampling_times = np.array([total_interval_length]) - - ia_times = data[self.target_field][0, :] - marks = data[self.target_field][1:, :] - - ts = np.cumsum(ia_times) - assert ts[-1] < total_interval_length, ( - "Target interarrival times provided are inconsistent with " - "start and end timestamps." - ) - - # select field names that will be included in outputs - keep_cols = { - k: v - for k, v in data.items() - if k not in [self.target_field, self.start_field, self.end_field] - } - - for future_start in sampling_times: - - r: DataEntry = dict() - - past_start = future_start - self.past_interval_length - future_end = future_start + self.future_interval_length - - assert past_start >= 0 - - past_mask = self._mask_sorted(ts, past_start, future_start) - - past_ia_times = np.diff(np.r_[0, ts[past_mask] - past_start])[np.newaxis] - - r[f"past_{self.target_field}"] = np.concatenate( - [past_ia_times, marks[:, past_mask]], axis=0 - ).transpose() - - r["past_valid_length"] = np.array([len(past_mask)]) - - r[self.forecast_start_field] = ( - data[self.start_field] - + data[self.start_field].freq.delta * future_start - ) - - if is_train: # include the future only if is_train - assert future_end <= total_interval_length - - future_mask = self._mask_sorted(ts, future_start, future_end) - - future_ia_times = np.diff(np.r_[0, ts[future_mask] - future_start])[ - np.newaxis - ] - - r[f"future_{self.target_field}"] = np.concatenate( - [future_ia_times, marks[:, future_mask]], axis=0 - ).transpose() - - r["future_valid_length"] = np.array([len(future_mask)]) - - # include other fields - r.update(keep_cols.copy()) - - yield r diff --git a/pts/transform/transform.py b/pts/transform/transform.py index 499b8c9..7d7eb13 100644 --- a/pts/transform/transform.py +++ b/pts/transform/transform.py @@ -16,7 +16,7 @@ from abc import ABC, abstractmethod from functools import reduce from typing import Callable, Iterator, Iterable, List -from pts.core.component import validated +from gluonts.core.component import validated from pts.dataset import DataEntry MAX_IDLE_TRANSFORMS = 100 @@ -43,6 +43,7 @@ class Chain(Transformation): """ Chain multiple transformations together. """ + @validated() def __init__(self, trans: List[Transformation]) -> None: self.transformations = [] diff --git a/setup.py b/setup.py index 7435167..06b71dc 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,8 @@ setup( zip_safe=True, python_requires=">=3.6", install_requires = [ - 'torch>=1.5.0', + 'torch>=1.7.0', + 'glounts>=0.6.4', 'holidays', 'numpy', 'pandas>=1.1', @@ -24,7 +25,6 @@ setup( 'tqdm', 'pydantic', 'matplotlib', - 'python-rapidjson', 'tensorboard', ], diff --git a/test/dataset/test_common.py b/test/dataset/test_common.py deleted file mode 100644 index a11ff9b..0000000 --- a/test/dataset/test_common.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# First-party imports -from pts.dataset import FieldName - - -def test_dataset_fields(): - assert ( - "feat_static_cat" == FieldName.FEAT_STATIC_CAT - ), "Error in the FieldName 'feat_static_cat'." - assert ( - "feat_static_real" == FieldName.FEAT_STATIC_REAL - ), "Error in the FieldName 'feat_static_real'." - assert ( - "feat_dynamic_cat" == FieldName.FEAT_DYNAMIC_CAT - ), "Error in the FieldName 'feat_dynamic_cat'." - assert ( - "feat_dynamic_real" == FieldName.FEAT_DYNAMIC_REAL - ), "Error in the FieldName 'feat_dynamic_real'." diff --git a/test/dataset/test_multivariate_grouper.py b/test/dataset/test_multivariate_grouper.py deleted file mode 100644 index 3f73a0b..0000000 --- a/test/dataset/test_multivariate_grouper.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -import numpy as np - -# Standard library imports -import pytest - -# First-party imports -from pts.dataset import ListDataset, MultivariateGrouper - -UNIVARIATE_TS = [ - [ - {"start": "2014-09-07", "target": [1, 2, 3, 4]}, - {"start": "2014-09-07", "target": [5, 6, 7, 8]}, - ], - [ - {"start": "2014-09-07", "target": [1, 2, 3, 4]}, - {"start": "2014-09-08", "target": [5, 6, 7, 8]}, - ], - [ - {"start": "2014-09-07", "target": [1, 2, 3, 4]}, - {"start": "2014-09-07", "target": [0]}, - ], - [ - {"start": "2014-09-07", "target": [1, 2, 3, 4]}, - {"start": "2014-09-01", "target": [0]}, - ], - [ - {"start": "2014-09-07", "target": [1, 2, 3, 4]}, - {"start": "2014-09-08", "target": [5, 6, 7, 8]}, - ], -] - -MULTIVARIATE_TS = [ - [{"start": "2014-09-07", "target": [[1, 2, 3, 4], [5, 6, 7, 8]]}], - [{"start": "2014-09-07", "target": [[1, 2, 3, 4, 2.5], [6.5, 5, 6, 7, 8]],}], - [{"start": "2014-09-07", "target": [[1, 2, 3, 4], [0, 0, 0, 0]]}], - [ - { - "start": "2014-09-01", - "target": [ - [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 1, 2, 3, 4], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ], - } - ], - [{"start": "2014-09-07", "target": [[1, 2, 3, 4, 0], [0, 5, 6, 7, 8]]}], -] - -TRAIN_FILL_RULE = [np.mean, np.mean, np.mean, np.mean, lambda x: 0.0] - - -@pytest.mark.parametrize( - "univariate_ts, multivariate_ts, train_fill_rule", - zip(UNIVARIATE_TS, MULTIVARIATE_TS, TRAIN_FILL_RULE), -) -def test_multivariate_grouper_train( - univariate_ts, multivariate_ts, train_fill_rule -) -> None: - univariate_ds = ListDataset(univariate_ts, freq="1D") - multivariate_ds = ListDataset(multivariate_ts, freq="1D", one_dim_target=False) - - grouper = MultivariateGrouper(train_fill_rule=train_fill_rule) - assert ( - list(grouper(univariate_ds))[0]["target"] == list(multivariate_ds)[0]["target"] - ).all() - - assert list(grouper(univariate_ds))[0]["start"] == list(multivariate_ds)[0]["start"] - - -UNIVARIATE_TS_TEST = [ - [ - {"start": "2014-09-07", "target": [1, 2, 3, 4]}, - {"start": "2014-09-07", "target": [5, 6, 7, 8]}, - {"start": "2014-09-08", "target": [0, 1, 2, 3]}, - {"start": "2014-09-08", "target": [4, 5, 6, 7]}, - ], - [ - {"start": "2014-09-07", "target": [1, 2, 3, 4]}, - {"start": "2014-09-07", "target": [5, 6, 7, 8]}, - {"start": "2014-09-08", "target": [0, 1, 2, 3]}, - {"start": "2014-09-08", "target": [4, 5, 6, 7]}, - ], -] - -MULTIVARIATE_TS_TEST = [ - [ - {"start": "2014-09-07", "target": [[1, 2, 3, 4], [5, 6, 7, 8]]}, - {"start": "2014-09-07", "target": [[0, 0, 1, 2, 3], [0, 4, 5, 6, 7]]}, - ], - [ - {"start": "2014-09-07", "target": [[5, 6, 7, 8]]}, - {"start": "2014-09-07", "target": [[0, 4, 5, 6, 7]]}, - ], -] - -TEST_FILL_RULE = [lambda x: 0.0, lambda x: 0.0] -MAX_TARGET_DIM = [2, 1] - - -@pytest.mark.parametrize( - "univariate_ts, multivariate_ts, test_fill_rule, max_target_dim", - zip(UNIVARIATE_TS_TEST, MULTIVARIATE_TS_TEST, TEST_FILL_RULE, MAX_TARGET_DIM,), -) -def test_multivariate_grouper_test( - univariate_ts, multivariate_ts, test_fill_rule, max_target_dim -) -> None: - univariate_ds = ListDataset(univariate_ts, freq="1D") - multivariate_ds = ListDataset(multivariate_ts, freq="1D", one_dim_target=False) - - grouper = MultivariateGrouper( - test_fill_rule=test_fill_rule, num_test_dates=2, max_target_dim=max_target_dim, - ) - - for grouped_data, multivariate_data in zip(grouper(univariate_ds), multivariate_ds): - assert (grouped_data["target"] == multivariate_data["target"]).all() - - assert grouped_data["start"] == multivariate_data["start"] diff --git a/test/dataset/test_process.py b/test/dataset/test_process.py deleted file mode 100644 index 21996d9..0000000 --- a/test/dataset/test_process.py +++ /dev/null @@ -1,21 +0,0 @@ -import pandas as pd -import pytest - -from pts.dataset import ProcessStartField - - -@pytest.mark.parametrize( - "freq, expected", - [ - ("B", "2019-11-01"), - ("W", "2019-11-03"), - ("M", "2019-11-30"), - ("12M", "2019-11-30"), - ("A-DEC", "2019-12-31"), - ], -) -def test_process_start_field(freq, expected): - process = ProcessStartField.process - given = "2019-11-01 12:34:56" - - assert process(given, freq) == pd.Timestamp(expected, freq) diff --git a/test/dataset/test_stat.py b/test/dataset/test_stat.py deleted file mode 100644 index 88d5d68..0000000 --- a/test/dataset/test_stat.py +++ /dev/null @@ -1,340 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Standard library imports -import unittest -from typing import cast - -# Third-party imports -import numpy as np -import pandas as pd - -# First-party imports -from pts.dataset import DataEntry, Dataset -from pts.dataset.stat import ( - DatasetStatistics, - ScaleHistogram, - calculate_dataset_statistics, -) - - -def make_dummy_dynamic_feat(target, num_features) -> np.ndarray: - # gives dummy dynamic_feat constructed from the target - return np.vstack([target * (i + 1) for i in range(num_features)]) - - -# default values for TimeSeries field -start = pd.Timestamp("1985-01-02", freq="1D") -target = np.random.randint(0, 10, 20) -fsc = [0, 1] -fsr = [0.1, 0.2] - - -def make_time_series( - start=start, - target=target, - feat_static_cat=fsc, - feat_static_real=fsr, - num_feat_dynamic_cat=1, - num_feat_dynamic_real=1, -) -> DataEntry: - feat_dynamic_cat = ( - make_dummy_dynamic_feat(target, num_feat_dynamic_cat).astype("int64") - if num_feat_dynamic_cat > 0 - else None - ) - feat_dynamic_real = ( - make_dummy_dynamic_feat(target, num_feat_dynamic_real).astype("float") - if num_feat_dynamic_real > 0 - else None - ) - data = { - "start": start, - "target": target, - "feat_static_cat": feat_static_cat, - "feat_static_real": feat_static_real, - "feat_dynamic_cat": feat_dynamic_cat, - "feat_dynamic_real": feat_dynamic_real, - } - return data - - -def ts( - start, - target, - feat_static_cat=None, - feat_static_real=None, - feat_dynamic_cat=None, - feat_dynamic_real=None, -) -> DataEntry: - d = {"start": start, "target": target} - if feat_static_cat is not None: - d["feat_static_cat"] = feat_static_cat - if feat_static_real is not None: - d["feat_static_real"] = feat_static_real - if feat_dynamic_cat is not None: - d["feat_dynamic_cat"] = feat_dynamic_cat - if feat_dynamic_real is not None: - d["feat_dynamic_real"] = feat_dynamic_real - return d - - -class DatasetStatisticsTest(unittest.TestCase): - def test_dataset_statistics(self) -> None: - - n = 2 - T = 10 - - # use integers to avoid float conversion that can fail comparison - np.random.seed(0) - targets = np.random.randint(0, 10, (n, T)) - - scale_histogram = ScaleHistogram() - for i in range(n): - scale_histogram.add(targets[i, :]) - - scale_histogram.add([]) - - expected = DatasetStatistics( - integer_dataset=True, - num_time_series=n + 1, - num_time_observations=targets.size, - mean_target_length=T * 2 / 3, - min_target=targets.min(), - mean_target=targets.mean(), - mean_abs_target=targets.mean(), - max_target=targets.max(), - feat_static_real=[{0.1}, {0.2, 0.3}], - feat_static_cat=[{1}, {2, 3}], - num_feat_dynamic_real=2, - num_feat_dynamic_cat=2, - num_missing_values=0, - scale_histogram=scale_histogram, - ) - - # FIXME: the cast below is a hack to make mypy happy - timeseries = cast( - Dataset, - [ - make_time_series( - target=targets[0, :], - feat_static_cat=[1, 2], - feat_static_real=[0.1, 0.2], - num_feat_dynamic_cat=2, - num_feat_dynamic_real=2, - ), - make_time_series( - target=targets[1, :], - feat_static_cat=[1, 3], - feat_static_real=[0.1, 0.3], - num_feat_dynamic_cat=2, - num_feat_dynamic_real=2, - ), - make_time_series( - target=np.array([]), - feat_static_cat=[1, 3], - feat_static_real=[0.1, 0.3], - num_feat_dynamic_cat=2, - num_feat_dynamic_real=2, - ), - ], - ) - - found = calculate_dataset_statistics(timeseries) - - assert expected == found - - def test_dataset_histogram(self) -> None: - - # generates 2 ** N - 1 timeseries with constant increasing values - N = 6 - n = 2 ** N - 1 - T = 5 - targets = np.ones((n, T)) - for i in range(0, n): - targets[i, :] = targets[i, :] * i - - # FIXME: the cast below is a hack to make mypy happy - timeseries = cast( - Dataset, [make_time_series(target=targets[i, :]) for i in range(n)] - ) - - found = calculate_dataset_statistics(timeseries) - - hist = found.scale_histogram.bin_counts - for i in range(0, N): - assert i in hist - assert hist[i] == 2 ** i - - -class DatasetStatisticsExceptions(unittest.TestCase): - def test_dataset_statistics_exceptions(self) -> None: - def check_error_message(expected_regex, dataset) -> None: - with self.assertRaisesRegex(Exception, expected_regex): - calculate_dataset_statistics(dataset) - - check_error_message("Time series dataset is empty!", []) - - check_error_message( - "Only empty time series found in the dataset!", - [make_time_series(target=np.random.randint(0, 10, 0))], - ) - - # infinite target - # check_error_message( - # "Target values have to be finite (e.g., not inf, -inf, " - # "or None) and cannot exceed single precision floating " - # "point range.", - # [make_time_series(target=np.full(20, np.inf))] - # ) - - # different number of feat_dynamic_{cat, real} - check_error_message( - "Found instances with different number of features in " - "feat_dynamic_cat, found one with 2 and another with 1.", - [ - make_time_series(num_feat_dynamic_cat=2), - make_time_series(num_feat_dynamic_cat=1), - ], - ) - check_error_message( - "Found instances with different number of features in " - "feat_dynamic_cat, found one with 0 and another with 1.", - [ - make_time_series(num_feat_dynamic_cat=0), - make_time_series(num_feat_dynamic_cat=1), - ], - ) - check_error_message( - "feat_dynamic_cat was found for some instances but not others.", - [ - make_time_series(num_feat_dynamic_cat=1), - make_time_series(num_feat_dynamic_cat=0), - ], - ) - check_error_message( - "Found instances with different number of features in " - "feat_dynamic_real, found one with 2 and another with 1.", - [ - make_time_series(num_feat_dynamic_real=2), - make_time_series(num_feat_dynamic_real=1), - ], - ) - check_error_message( - "Found instances with different number of features in " - "feat_dynamic_real, found one with 0 and another with 1.", - [ - make_time_series(num_feat_dynamic_real=0), - make_time_series(num_feat_dynamic_real=1), - ], - ) - check_error_message( - "feat_dynamic_real was found for some instances but not others.", - [ - make_time_series(num_feat_dynamic_real=1), - make_time_series(num_feat_dynamic_real=0), - ], - ) - - # infinite feat_dynamic_{cat,real} - inf_dynamic_feat = np.full((2, len(target)), np.inf) - check_error_message( - "Features values have to be finite and cannot exceed single " - "precision floating point range.", - [ - ts( - start, - target, - feat_dynamic_cat=inf_dynamic_feat, - feat_static_cat=[0, 1], - ) - ], - ) - check_error_message( - "Features values have to be finite and cannot exceed single " - "precision floating point range.", - [ - ts( - start, - target, - feat_dynamic_real=inf_dynamic_feat, - feat_static_cat=[0, 1], - ) - ], - ) - - # feat_dynamic_{cat, real} different length from target - check_error_message( - "Each feature in feat_dynamic_cat has to have the same length as the " - "target. Found an instance with feat_dynamic_cat of length 1 and a " - "target of length 20.", - [ - ts( - start=start, - target=target, - feat_static_cat=[0, 1], - feat_dynamic_cat=np.ones((1, 1)), - ) - ], - ) - check_error_message( - "Each feature in feat_dynamic_real has to have the same length as the " - "target. Found an instance with feat_dynamic_real of length 1 and a " - "target of length 20.", - [ - ts( - start=start, - target=target, - feat_static_cat=[0, 1], - feat_dynamic_real=np.ones((1, 1)), - ) - ], - ) - - # feat_static_{cat, real} different length - check_error_message( - "Not all feat_static_cat vectors have the same length 2 != 1.", - [ - ts(start=start, target=target, feat_static_cat=[0, 1]), - ts(start=start, target=target, feat_static_cat=[1]), - ], - ) - check_error_message( - "Not all feat_static_real vectors have the same length 2 != 1.", - [ - ts(start=start, target=target, feat_static_real=[0, 1]), - ts(start=start, target=target, feat_static_real=[1]), - ], - ) - - calculate_dataset_statistics( - # FIXME: the cast below is a hack to make mypy happy - cast( - Dataset, - [ - make_time_series(num_feat_dynamic_cat=2), - make_time_series(num_feat_dynamic_cat=2), - ], - ) - ) - - calculate_dataset_statistics( - # FIXME: the cast below is a hack to make mypy happy - cast( - Dataset, - [ - make_time_series(num_feat_dynamic_cat=0), - make_time_series(num_feat_dynamic_cat=0), - ], - ) - ) diff --git a/test/evaluation/test_evaluator.py b/test/evaluation/test_evaluator.py deleted file mode 100644 index c9cdb21..0000000 --- a/test/evaluation/test_evaluator.py +++ /dev/null @@ -1,649 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Third-party imports -import numpy as np -import pandas as pd -import pytest - -# First-party imports -from pts.evaluation import ( - Evaluator, - MultivariateEvaluator, -) -from pts.feature import get_seasonality -from pts.model.forecast import QuantileForecast, SampleForecast - -QUANTILES = [str(q / 10.0) for q in range(1, 10)] - - -def data_iterator(ts): - """ - :param ts: list of pd.Series or pd.DataFrame - :return: - """ - for i in range(len(ts)): - yield ts[i] - - -def fcst_iterator(fcst, start_dates, freq): - """ - :param fcst: list of numpy arrays with the sample paths - :return: - """ - for i in range(len(fcst)): - yield SampleForecast(samples=fcst[i], start_date=start_dates[i], freq=freq) - - -def iterator(it): - """ - Convenience function to toggle whether to consume dataset and forecasts as iterators or iterables. - :param it: - :return: it (as iterator) - """ - return iter(it) - - -def iterable(it): - """ - Convenience function to toggle whether to consume dataset and forecasts as iterators or iterables. - :param it: - :return: it (as iterable) - """ - return list(it) - - -def naive_forecaster(ts, prediction_length, num_samples=100, target_dim=0): - """ - :param ts: pandas.Series - :param prediction_length: - :param num_samples: number of sample paths - :param target_dim: number of axes of target (0: scalar, 1: array, ...) - :return: np.array with dimension (num_samples, prediction_length) - """ - - # naive prediction: last observed value - naive_pred = ts.values[-prediction_length - 1] - assert len(naive_pred.shape) == target_dim - return np.tile( - naive_pred, - (num_samples, prediction_length) + tuple(1 for _ in range(target_dim)), - ) - - -def naive_multivariate_forecaster(ts, prediction_length, num_samples=100): - return naive_forecaster(ts, prediction_length, num_samples, target_dim=1) - - -def calculate_metrics( - timeseries, - evaluator, - ts_datastructure, - has_nans=False, - forecaster=naive_forecaster, - input_type=iterator, -): - num_timeseries = timeseries.shape[0] - num_timestamps = timeseries.shape[1] - - if has_nans: - timeseries[0, 1] = np.nan - timeseries[0, 7] = np.nan - - num_samples = 100 - prediction_length = 3 - freq = "1D" - - ts_start_dates = ( - [] - ) # starting date of each time series - can be different in general - pd_timeseries = [] # list of pandas.DataFrame - samples = [] # list of forecast samples - start_dates = [] # start date of the prediction range - for i in range(num_timeseries): - ts_start_dates.append(pd.Timestamp(year=2018, month=1, day=1, hour=1)) - index = pd.date_range(ts_start_dates[i], periods=num_timestamps, freq=freq) - - pd_timeseries.append(ts_datastructure(timeseries[i], index=index)) - samples.append(forecaster(pd_timeseries[i], prediction_length, num_samples)) - start_dates.append( - pd.date_range(ts_start_dates[i], periods=num_timestamps, freq=freq)[ - -prediction_length - ] - ) - - # data iterator - data_iter = input_type(data_iterator(pd_timeseries)) - fcst_iter = input_type(fcst_iterator(samples, start_dates, freq)) - - # evaluate - agg_df, item_df = evaluator(data_iter, fcst_iter) - return agg_df, item_df - - -TIMESERIES_M4 = [ - np.array( - [ - [ - 2.943_013, - 2.822_251, - 4.196_222, - 1.328_664, - 4.947_390, - 3.333_131, - 1.479_800, - 2.265_094, - 3.413_493, - 3.497_607, - ], - [ - -0.126_781_2, - 3.057_412_2, - 1.901_594_4, - 2.772_549_5, - 3.312_853_1, - 4.411_818_0, - 3.709_025_2, - 4.322_028, - 2.565_359, - 3.074_308, - ], - [ - 2.542_998, - 2.336_757, - 1.417_916, - 1.335_139, - 2.523_035, - 3.645_589, - 3.382_819, - 2.075_960, - 2.643_869, - 2.772_456, - ], - [ - 0.315_685_6, - 1.892_312_1, - 2.476_861_2, - 3.511_628_6, - 4.384_346_5, - 2.960_685_6, - 4.897_572_5, - 3.280_125, - 4.768_556, - 4.958_616, - ], - [ - 2.205_877_3, - 0.782_759_4, - 2.401_420_8, - 2.385_643_4, - 4.845_818_2, - 3.102_322_9, - 3.567_723_7, - 4.878_143, - 3.735_245, - 2.218_113, - ], - ] - ), - np.array( - [ - [ - 13.11301, - 13.16225, - 14.70622, - 12.00866, - 15.79739, - 14.35313, - 12.66980, - 13.62509, - 14.94349, - 15.19761, - ], - [ - 10.04322, - 13.39741, - 12.41159, - 13.45255, - 14.16285, - 15.43182, - 14.89903, - 15.68203, - 14.09536, - 14.77431, - ], - [ - 12.71300, - 12.67676, - 11.92792, - 12.01514, - 13.37303, - 14.66559, - 14.57282, - 13.43596, - 14.17387, - 14.47246, - ], - [ - 10.48569, - 12.23231, - 12.98686, - 14.19163, - 15.23435, - 13.98069, - 16.08757, - 14.64012, - 16.29856, - 16.65862, - ], - [ - 12.37588, - 11.12276, - 12.91142, - 13.06564, - 15.69582, - 14.12232, - 14.75772, - 16.23814, - 15.26524, - 13.91811, - ], - ] - ), -] - -RES_M4 = [ - { - "MASE": 0.816_837_618, - "MAPE": 0.324_517_430_685_928_1, - "sMAPE": 0.326_973_268_4, - "seasonal_error": np.array( - [1.908_101, 1.258_838, 0.63018, 1.238_201, 1.287_771] - ), - }, - { - "MASE": 0.723_948_2, - "MAPE": 0.063_634_129_851_747_6, - "sMAPE": 0.065_310_85, - "seasonal_error": np.array( - [1.867_847, 1.315_505, 0.602_587_4, 1.351_535, 1.339_179] - ), - }, -] - - -@pytest.mark.parametrize("timeseries, res", zip(TIMESERIES_M4, RES_M4)) -def test_MASE_sMAPE_M4(timeseries, res): - ts_datastructure = pd.Series - evaluator = Evaluator(quantiles=QUANTILES) - agg_df, item_df = calculate_metrics(timeseries, evaluator, ts_datastructure) - - assert abs((agg_df["MASE"] - res["MASE"]) / res["MASE"]) < 0.001, ( - "Scores for the metric MASE do not match: " - "\nexpected: {} \nobtained: {}".format(res["MASE"], agg_df["MASE"]) - ) - assert abs((agg_df["MAPE"] - res["MAPE"]) / res["MAPE"]) < 0.001, ( - "Scores for the metric MAPE do not match: \nexpected: {} " - "\nobtained: {}".format(res["MAPE"], agg_df["MAPE"]) - ) - assert abs((agg_df["sMAPE"] - res["sMAPE"]) / res["sMAPE"]) < 0.001, ( - "Scores for the metric sMAPE do not match: \nexpected: {} " - "\nobtained: {}".format(res["sMAPE"], agg_df["sMAPE"]) - ) - assert sum(abs(item_df["seasonal_error"].values - res["seasonal_error"])) < 0.001, ( - "Scores for the metric seasonal_error do not match: \nexpected: {} " - "\nobtained: {}".format(res["seasonal_error"], item_df["seasonal_error"].values) - ) - - -TIMESERIES = [ - np.ones((5, 10), dtype=np.float64), - np.ones((5, 10), dtype=np.float64), - np.arange(0, 50, dtype=np.float64).reshape(5, 10), - np.arange(0, 50, dtype=np.float64).reshape(5, 10), - np.array([[np.nan] * 10, [1.0] * 10]), -] - -RES = [ - { - "MSE": 0.0, - "abs_error": 0.0, - "abs_target_sum": 15.0, - "abs_target_mean": 1.0, - "seasonal_error": 0.0, - "MASE": 0.0, - "MAPE": 0.0, - "sMAPE": 0.0, - "MSIS": 0.0, - "RMSE": 0.0, - "NRMSE": 0.0, - "ND": 0.0, - "MAE_Coverage": 0.5, - }, - { - "MSE": 0.0, - "abs_error": 0.0, - "abs_target_sum": 14.0, - "abs_target_mean": 1.0, - "seasonal_error": 0.0, - "MASE": 0.0, - "MAPE": 0.0, - "sMAPE": 0.0, - "MSIS": 0.0, - "RMSE": 0.0, - "NRMSE": 0.0, - "ND": 0.0, - "MAE_Coverage": 0.5, - }, - { - "MSE": 4.666_666_666_666, - "abs_error": 30.0, - "abs_target_sum": 420.0, - "abs_target_mean": 28.0, - "seasonal_error": 1.0, - "MASE": 2.0, - "MAPE": 0.103_112_211_532_524_85, - "sMAPE": 0.113_254_049_3, - "MSIS": 80.0, - "RMSE": 2.160_246_899_469_286_9, - "NRMSE": 0.077_151_674_981_045_956, - "ND": 0.071_428_571_428_571_42, - "MAE_Coverage": 0.5, - }, - { - "MSE": 5.033_333_333_333_3, - "abs_error": 29.0, - "abs_target_sum": 413.0, - "abs_target_mean": 28.1, - "seasonal_error": 1.0, - "MASE": 2.1, - "MAPE": 0.113_032_846_453_159_77, - "sMAPE": 0.125_854_781_903_299_57, - "MSIS": 84.0, - "RMSE": 2.243_509_156_061_845_6, - "NRMSE": 0.079_840_183_489_745_39, - "ND": 0.070_217_917_675_544_79, - "MAE_Coverage": 0.5, - }, - { - "MSE": 0.0, - "abs_error": 0.0, - "abs_target_sum": 3.0, - "abs_target_mean": 1.0, - "seasonal_error": 0.0, - "MASE": 0.0, - "MAPE": 0.0, - "sMAPE": 0.0, - "MSIS": 0.0, - "RMSE": 0.0, - "NRMSE": 0.0, - "ND": 0.0, - "MAE_Coverage": 0.5, - }, -] - -HAS_NANS = [False, True, False, True, True] - - -INPUT_TYPE = [iterable, iterable, iterator, iterator, iterable] - - -@pytest.mark.parametrize( - "timeseries, res, has_nans, input_type", zip(TIMESERIES, RES, HAS_NANS, INPUT_TYPE), -) -def test_metrics(timeseries, res, has_nans, input_type): - ts_datastructure = pd.Series - evaluator = Evaluator(quantiles=QUANTILES, num_workers=0) - agg_metrics, item_metrics = calculate_metrics( - timeseries, - evaluator, - ts_datastructure, - has_nans=has_nans, - input_type=input_type, - ) - - for metric, score in agg_metrics.items(): - if metric in res.keys(): - assert abs(score - res[metric]) < 0.001, ( - "Scores for the metric {} do not match: \nexpected: {} " - "\nobtained: {}".format(metric, res[metric], score) - ) - - -@pytest.mark.parametrize( - "timeseries, res, has_nans, input_type", zip(TIMESERIES, RES, HAS_NANS, INPUT_TYPE), -) -def test_metrics_mp(timeseries, res, has_nans, input_type): - ts_datastructure = pd.Series - # Default will be multiprocessing evaluator - evaluator = Evaluator(quantiles=QUANTILES, num_workers=4) - agg_metrics, item_metrics = calculate_metrics( - timeseries, - evaluator, - ts_datastructure, - has_nans=has_nans, - input_type=input_type, - ) - - for metric, score in agg_metrics.items(): - if metric in res.keys(): - assert abs(score - res[metric]) < 0.001, ( - "Scores for the metric {} do not match: \nexpected: {} " - "\nobtained: {}".format(metric, res[metric], score) - ) - - -TIMESERIES_MULTIVARIATE = [ - np.ones((5, 10, 2), dtype=np.float64), - np.ones((5, 10, 2), dtype=np.float64), - np.ones((5, 10, 2), dtype=np.float64), - np.stack( - ( - np.arange(0, 50, dtype=np.float64).reshape(5, 10), - np.arange(50, 100, dtype=np.float64).reshape(5, 10), - ), - axis=2, - ), - np.stack( - ( - np.arange(0, 50, dtype=np.float64).reshape(5, 10), - np.arange(50, 100, dtype=np.float64).reshape(5, 10), - ), - axis=2, - ), - np.stack( - ( - np.arange(0, 50, dtype=np.float64).reshape(5, 10), - np.arange(50, 100, dtype=np.float64).reshape(5, 10), - ), - axis=2, - ), -] - -RES_MULTIVARIATE = [ - { - "MSE": 0.0, - "0_MSE": 0.0, - "1_MSE": 0.0, - "abs_error": 0.0, - "abs_target_sum": 15.0, - "abs_target_mean": 1.0, - "seasonal_error": 0.0, - "MASE": 0.0, - "sMAPE": 0.0, - "MSIS": 0.0, - "RMSE": 0.0, - "NRMSE": 0.0, - "ND": 0.0, - "MAE_Coverage": 0.5, - "m_sum_MSE": 0.0, - }, - { - "MSE": 0.0, - "abs_error": 0.0, - "abs_target_sum": 15.0, - "abs_target_mean": 1.0, - "seasonal_error": 0.0, - "MASE": 0.0, - "sMAPE": 0.0, - "MSIS": 0.0, - "RMSE": 0.0, - "NRMSE": 0.0, - "ND": 0.0, - "MAE_Coverage": 0.5, - "m_sum_MSE": 0.0, - }, - { - "MSE": 0.0, - "abs_error": 0.0, - "abs_target_sum": 30.0, - "abs_target_mean": 1.0, - "seasonal_error": 0.0, - "MASE": 0.0, - "sMAPE": 0.0, - "MSIS": 0.0, - "RMSE": 0.0, - "NRMSE": 0.0, - "ND": 0.0, - "MAE_Coverage": 0.5, - "m_sum_MSE": 0.0, - }, - { - "MSE": 4.666_666_666_666, - "abs_error": 30.0, - "abs_target_sum": 420.0, - "abs_target_mean": 28.0, - "seasonal_error": 1.0, - "MASE": 2.0, - "sMAPE": 0.113_254_049_3, - "MSIS": 80.0, - "RMSE": 2.160_246_899_469_286_9, - "NRMSE": 0.077_151_674_981_045_956, - "ND": 0.071_428_571_428_571_42, - "MAE_Coverage": 0.5, - "m_sum_MSE": 18.666_666_666_666, - }, - { - "MSE": 4.666_666_666_666, - "abs_error": 30.0, - "abs_target_sum": 1170.0, - "abs_target_mean": 78.0, - "seasonal_error": 1.0, - "MASE": 2.0, - "sMAPE": 0.026_842_301_756_499_45, - "MSIS": 80.0, - "RMSE": 2.160_246_899_469_286_9, - "NRMSE": 0.027_695_473_070_119_065, - "ND": 0.025_641_025_641_025_64, - "MAE_Coverage": 0.5, - "m_sum_MSE": 18.666_666_666_666, - }, - { - "MSE": 4.666_666_666_666, - "abs_error": 60.0, - "abs_target_sum": 1590.0, - "abs_target_mean": 53.0, - "seasonal_error": 1.0, - "MASE": 2.0, - "sMAPE": 0.070_048_175_528_249_73, - "MSIS": 80.0, - "RMSE": 2.160_246_899_469_286_9, - "NRMSE": 0.040_759_375_461_684_65, - "ND": 0.037_735_849_056_603_77, - "MAE_Coverage": 0.5, - "m_sum_MSE": 18.666_666_666_666, - }, -] - -HAS_NANS_MULTIVARIATE = [False, False, False, False, False, False] - -EVAL_DIMS = [[0], [1], [0, 1], [0], [1], None] - -INPUT_TYPE = [iterable, iterable, iterator, iterator, iterable, iterator] - - -@pytest.mark.parametrize( - "timeseries, res, has_nans, eval_dims, input_type", - zip( - TIMESERIES_MULTIVARIATE, - RES_MULTIVARIATE, - HAS_NANS_MULTIVARIATE, - EVAL_DIMS, - INPUT_TYPE, - ), -) -def test_metrics_multivariate(timeseries, res, has_nans, eval_dims, input_type): - ts_datastructure = pd.DataFrame - evaluator = MultivariateEvaluator( - quantiles=QUANTILES, eval_dims=eval_dims, target_agg_funcs={"sum": np.sum}, - ) - - agg_metrics, item_metrics = calculate_metrics( - timeseries, - evaluator, - ts_datastructure, - has_nans=has_nans, - forecaster=naive_multivariate_forecaster, - input_type=input_type, - ) - - for metric, score in agg_metrics.items(): - if metric in res.keys(): - assert abs(score - res[metric]) < 0.001, ( - "Scores for the metric {} do not match: \nexpected: {} " - "\nobtained: {}".format(metric, res[metric], score) - ) - - -def test_evaluation_with_QuantileForecast(): - start = "2012-01-11" - target = [2.4, 1.0, 3.0, 4.4, 5.5, 4.9] * 11 - index = pd.date_range(start=start, freq="1D", periods=len(target)) - ts = pd.Series(index=index, data=target) - - ev = Evaluator(quantiles=("0.1", "0.2", "0.5")) - - fcst = [ - QuantileForecast( - start_date=pd.Timestamp("2012-01-11"), - freq="D", - forecast_arrays=np.array([[2.4, 9.0, 3.0, 2.4, 5.5, 4.9] * 10]), - forecast_keys=["0.5"], - ) - ] - - agg_metric, _ = ev(iter([ts]), iter(fcst)) - - assert np.isfinite(agg_metric["wQuantileLoss[0.5]"]) - - -@pytest.mark.parametrize( - "freq, expected_seasonality", - [ - ("1H", 24), - ("H", 24), - ("2H", 12), - ("3H", 8), - ("4H", 6), - ("15H", 1), - ("5B", 1), - ("1B", 5), - ("2W", 1), - ("3M", 4), - ("1D", 1), - ("7D", 1), - ("8D", 1), - ], -) -def test_get_seasonality(freq, expected_seasonality): - assert get_seasonality(freq) == expected_seasonality diff --git a/test/feature/test_lag.py b/test/feature/test_lag.py deleted file mode 100644 index a56df2a..0000000 --- a/test/feature/test_lag.py +++ /dev/null @@ -1,311 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - - -from pts.feature import get_lags_for_frequency - -# These are the expected lags for common frequencies and corner cases. -# By default all frequencies have the following lags: [1, 2, 3, 4, 5, 6, 7]. -# Remaining lags correspond to the same `season` (+/- `delta`) in previous `k` cycles. -expected_lags = { - # (apart from the default lags) centered around each of the last 3 hours (delta = 2) - "min": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 58, - 59, - 60, - 61, - 62, - 118, - 119, - 120, - 121, - 122, - 178, - 179, - 180, - 181, - 182, - ], - # centered around each of the last 3 hours (delta = 2) + last 7 days (delta = 1) - "15min": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - + [ - 95, - 96, - 97, - 191, - 192, - 193, - 287, - 288, - 289, - 383, - 384, - 385, - 479, - 480, - 481, - 575, - 576, - 577, - 671, - 672, - 673, - ], - # centered around each of the last 3 hours (delta = 2) + last 7 days (delta = 1) + 3 weeks (delta = 1) - "30min": [1, 2, 3, 4, 5, 6, 7, 8] - + [ - 47, - 48, - 49, - 95, - 96, - 97, - 143, - 144, - 145, - 191, - 192, - 193, - 239, - 240, - 241, - 287, - 288, - 289, - 335, - 336, - 337, - ] - + [671, 672, 673, 1007, 1008, 1009], - # centered around each of the last 3 hours (delta = 2) + last 7 days (delta = 1) + last 6 weeks (delta = 1) - "59min": [1, 2, 3, 4, 5, 6, 7] - + [ - 23, - 24, - 25, - 47, - 48, - 49, - 72, - 73, - 74, - 96, - 97, - 98, - 121, - 122, - 123, - 145, - 146, - 147, - 169, - 170, - 171, - ] - + [340, 341, 342, 511, 512, 513, 682, 683, 684, 731, 732, 733], - # centered around each of the last 3 hours (delta = 2) + last 7 days (delta = 1) + last 6 weeks (delta = 1) - "61min": [1, 2, 3, 4, 5, 6, 7] - + [ - 22, - 23, - 24, - 46, - 47, - 48, - 69, - 70, - 71, - 93, - 94, - 95, - 117, - 118, - 119, - 140, - 141, - 142, - 164, - 165, - 166, - ] - + [329, 330, 331, 494, 495, 496, 659, 660, 661, 707, 708, 709], - # centered around each of the last 3 hours (delta = 2) + last 7 days (delta = 1) + last 6 weeks (delta = 1) - "H": [1, 2, 3, 4, 5, 6, 7] - + [ - 23, - 24, - 25, - 47, - 48, - 49, - 71, - 72, - 73, - 95, - 96, - 97, - 119, - 120, - 121, - 143, - 144, - 145, - 167, - 168, - 169, - ] - + [335, 336, 337, 503, 504, 505, 671, 672, 673, 719, 720, 721], - # centered around each of the last 7 days (delta = 1) + last 4 weeks (delta = 1) + last 1 month (delta = 1) + - # last 8th and 12th weeks (delta = 0) - "6H": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 11, - 12, - 13, - 15, - 16, - 17, - 19, - 20, - 21, - 23, - 24, - 25, - 27, - 28, - 29, - ] - + [55, 56, 57, 83, 84, 85, 111, 112, 113] - + [119, 120, 121] - + [224, 336], - # centered around each of the last 7 days (delta = 1) + last 4 weeks (delta = 1) + last 1 month (delta = 1) + - # last 8th and 12th weeks (delta = 0) + last year (delta = 1) - "12H": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - + [27, 28, 29, 41, 42, 43, 55, 56, 57] - + [59, 60, 61] - + [112, 168] - + [727, 728, 729], - # centered around each of the last 7 days (delta = 1) + last 4 weeks (delta = 1) + last 1 month (delta = 1) + - # last 8th and 12th weeks (delta = 0) + last 3 years (delta = 1) - "23H": [1, 2, 3, 4, 5, 6, 7, 8] - + [13, 14, 15, 20, 21, 22, 28, 29] - + [30, 31, 32] - + [58, 87] - + [378, 379, 380, 758, 759, 760, 1138, 1139, 1140], - # centered around each of the last 7 days (delta = 1) + last 4 weeks (delta = 1) + last 1 month (delta = 1) + - # last 8th and 12th weeks (delta = 0) + last 3 years (delta = 1) - "25H": [1, 2, 3, 4, 5, 6, 7] - + [12, 13, 14, 19, 20, 21, 25, 26, 27] - + [28, 29] - + [53, 80] - + [348, 349, 350, 697, 698, 699, 1047, 1048, 1049], - # centered around each of the last 7 days (delta = 1) + last 4 weeks (delta = 1) + last 1 month (delta = 1) + - # last 8th and 12th weeks (delta = 0) + last 3 years (delta = 1) - "D": [1, 2, 3, 4, 5, 6, 7, 8] - + [13, 14, 15, 20, 21, 22, 27, 28, 29] - + [30, 31] - + [56, 84] - + [363, 364, 365, 727, 728, 729, 1091, 1092, 1093], - # centered around each of the last 7 days (delta = 1) + last 4 weeks (delta = 1) + last 1 month (delta = 1) + - # last 8th and 12th weeks (delta = 0) + last 3 years (delta = 1) - "2D": [1, 2, 3, 4, 5] - + [6, 7, 8, 9, 10, 11, 13, 14, 15] - + [16] - + [28, 42] - + [181, 182, 183, 363, 364, 365, 545, 546, 547], - # centered around each of the last 3 months (delta = 0) + last 3 years (delta = 1) (assuming 52 weeks per year) - "6D": [1, 2, 3, 4, 5, 6, 7, 9, 14] + [59, 60, 61, 120, 121, 122, 181, 182, 183], - # centered around each of the last 3 months (delta = 0) + last 3 years (delta = 1) (assuming 52 weeks per year) - "W": [1, 2, 3, 4, 5, 6, 7, 8, 12] + [51, 52, 53, 103, 104, 105, 155, 156, 157], - # centered around each of the last 3 months (delta = 0) + last 3 years (delta = 1) (assuming 52 weeks per year) - "8D": [1, 2, 3, 4, 5, 6, 7, 10] + [44, 45, 46, 90, 91, 92, 135, 136, 137], - # centered around each of the last 3 years (delta = 1) - "4W": [1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 25, 26, 27, 38, 39, 40], - # centered around each of the last 3 years (delta = 1) - "3W": [1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 33, 34, 35, 51, 52, 53], - # centered around each of the last 3 years (delta = 1) - "5W": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 19, 20, 21, 30, 31, 32], - # centered around each of the last 3 years (delta = 1) - "M": [1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 23, 24, 25, 35, 36, 37], - # default - "6M": [1, 2, 3, 4, 5, 6, 7], - # default - "12M": [1, 2, 3, 4, 5, 6, 7], -} - -# For the default multiple (1) -for freq in ["min", "H", "D", "W", "M"]: - expected_lags["1" + freq] = expected_lags[freq] - -# For frequencies that do not have unique form -expected_lags["60min"] = expected_lags["1H"] -expected_lags["24H"] = expected_lags["1D"] -expected_lags["7D"] = expected_lags["1W"] - - -def test_lags(): - - freq_strs = [ - "min", - "1min", - "15min", - "30min", - "59min", - "60min", - "61min", - "H", - "1H", - "6H", - "12H", - "23H", - "24H", - "25H", - "D", - "1D", - "2D", - "6D", - "7D", - "8D", - "W", - "1W", - "3W", - "4W", - "5W", - "M", - "6M", - "12M", - ] - - for freq_str in freq_strs: - lags = get_lags_for_frequency(freq_str) - - assert ( - lags == expected_lags[freq_str] - ), "lags do not match for the frequency '{}':\nexpected: {},\nprovided: {}".format( - freq_str, expected_lags[freq_str], lags - ) diff --git a/test/modules/test_distribution_output.py b/test/modules/test_distribution_output.py index fea01d2..6a818b3 100644 --- a/test/modules/test_distribution_output.py +++ b/test/modules/test_distribution_output.py @@ -17,8 +17,8 @@ from torch.nn.utils import clip_grad_norm_ from torch.optim import SGD from torch.utils.data import TensorDataset, DataLoader +from gluonts.torch.modules.distribution_output import DistributionOutput from pts.modules import ( - DistributionOutput, StudentTOutput, BetaOutput, NegativeBinomialOutput, diff --git a/test/modules/test_implicit_quantile_distr_output.py b/test/modules/test_implicit_quantile_distr_output.py index c816b53..207f702 100644 --- a/test/modules/test_implicit_quantile_distr_output.py +++ b/test/modules/test_implicit_quantile_distr_output.py @@ -11,13 +11,14 @@ from torch.nn.utils import clip_grad_norm_ from torch.optim import SGD from torch.utils.data import TensorDataset, DataLoader +from gluonts.dataset.repository.datasets import get_dataset +from gluonts.evaluation import Evaluator +from gluonts.evaluation.backtest import make_evaluation_predictions +from gluonts.torch.modules.distribution_output import DistributionOutput from pts import Trainer -from pts.dataset.repository import get_dataset -from pts.evaluation import make_evaluation_predictions, Evaluator from pts.model.deepar import DeepAREstimator from pts.model.simple_feedforward import SimpleFeedForwardEstimator from pts.modules import ( - DistributionOutput, ImplicitQuantileOutput ) @@ -172,7 +173,7 @@ def test_training_with_implicit_quantile_output(): ) forecasts = list(forecast_it) tss = list(ts_it) - evaluator = Evaluator() + evaluator = Evaluator(num_workers=0) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test)) assert agg_metrics["MSE"] > 0 @@ -220,7 +221,7 @@ def test_instanciation_of_args_proj(): ) forecasts = list(forecast_it) tss = list(ts_it) - evaluator = Evaluator() + evaluator = Evaluator(num_workers=0) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test)) assert distr_output.method_calls == 2 diff --git a/test/test_transform.py b/test/test_transform.py deleted file mode 100644 index 697477b..0000000 --- a/test/test_transform.py +++ /dev/null @@ -1,808 +0,0 @@ -# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. - -# Standard library imports -from typing import Tuple - -# Third-party imports -import numpy as np -import pandas as pd -import pytest -import torch - -from pts import transform - -# First-party imports -from pts.dataset import ( - ProcessStartField, - FieldName, - ListDataset, - DataEntry, - calculate_dataset_statistics, - ScaleHistogram, -) -from pts.feature import time_feature - -FREQ = "1D" - -TEST_VALUES = { - "is_train": [True, False], - "target": [np.zeros(0), np.random.rand(13), np.random.rand(100)], - "start": [ - ProcessStartField.process("2012-01-02", freq="1D"), - ProcessStartField.process("1994-02-19 20:01:02", freq="3D"), - ], - "use_prediction_features": [True, False], - "allow_target_padding": [True, False], -} - - -def test_align_timestamp(): - def aligned_with(date_str, freq): - return str(ProcessStartField.process(date_str, freq=freq)) - - for _ in range(2): - assert aligned_with("2012-03-05 09:13:12", "min") == "2012-03-05 09:13:00" - assert aligned_with("2012-03-05 09:13:12", "2min") == "2012-03-05 09:12:00" - assert aligned_with("2012-03-05 09:13:12", "H") == "2012-03-05 09:00:00" - assert aligned_with("2012-03-05 09:13:12", "D") == "2012-03-05 00:00:00" - assert aligned_with("2012-03-05 09:13:12", "W") == "2012-03-11 00:00:00" - assert aligned_with("2012-03-05 09:13:12", "4W") == "2012-03-11 00:00:00" - assert aligned_with("2012-03-05 09:13:12", "M") == "2012-03-31 00:00:00" - assert aligned_with("2012-03-05 09:13:12", "3M") == "2012-03-31 00:00:00" - assert aligned_with("2012-03-05 09:13:12", "Y") == "2012-12-31 00:00:00" - assert aligned_with("2012-03-05 09:14:11", "min") == "2012-03-05 09:14:00" - assert aligned_with("2012-03-05 09:14:11", "2min") == "2012-03-05 09:14:00" - assert aligned_with("2012-03-05 09:14:11", "H") == "2012-03-05 09:00:00" - assert aligned_with("2012-03-05 09:14:11", "D") == "2012-03-05 00:00:00" - assert aligned_with("2012-03-05 09:14:11", "W") == "2012-03-11 00:00:00" - assert aligned_with("2012-03-05 09:14:11", "4W") == "2012-03-11 00:00:00" - assert aligned_with("2012-03-05 09:14:11", "M") == "2012-03-31 00:00:00" - assert aligned_with("2012-03-05 09:14:11", "3M") == "2012-03-31 00:00:00" - - -@pytest.mark.parametrize("is_train", TEST_VALUES["is_train"]) -@pytest.mark.parametrize("target", TEST_VALUES["target"]) -@pytest.mark.parametrize("start", TEST_VALUES["start"]) -def test_AddTimeFeatures(start, target, is_train: bool): - pred_length = 13 - t = transform.AddTimeFeatures( - start_field=FieldName.START, - target_field=FieldName.TARGET, - output_field="myout", - pred_length=pred_length, - time_features=[time_feature.DayOfWeek(), time_feature.DayOfMonth()], - ) - - data = {"start": start, "target": target} - res = t.map_transform(data, is_train=is_train) - mat = res["myout"] - expected_length = len(target) + (0 if is_train else pred_length) - assert mat.shape == (2, expected_length) - tmp_idx = pd.date_range(start=start, freq=start.freq, periods=expected_length) - assert np.alltrue(mat[0] == time_feature.DayOfWeek()(tmp_idx)) - assert np.alltrue(mat[1] == time_feature.DayOfMonth()(tmp_idx)) - - -@pytest.mark.parametrize("is_train", TEST_VALUES["is_train"]) -@pytest.mark.parametrize("target", TEST_VALUES["target"]) -@pytest.mark.parametrize("start", TEST_VALUES["start"]) -def test_AddTimeFeatures_empty_time_features(start, target, is_train: bool): - pred_length = 13 - t = transform.AddTimeFeatures( - start_field=FieldName.START, - target_field=FieldName.TARGET, - output_field="myout", - pred_length=pred_length, - time_features=[], - ) - - data = {"start": start, "target": target} - res = t.map_transform(data, is_train=is_train) - assert res["myout"] is None - - -@pytest.mark.parametrize("is_train", TEST_VALUES["is_train"]) -@pytest.mark.parametrize("target", TEST_VALUES["target"]) -@pytest.mark.parametrize("start", TEST_VALUES["start"]) -def test_AddAgeFeatures(start, target, is_train: bool): - pred_length = 13 - t = transform.AddAgeFeature( - pred_length=pred_length, - target_field=FieldName.TARGET, - output_field="age", - log_scale=True, - ) - - data = {"start": start, "target": target} - out = t.map_transform(data, is_train=is_train) - expected_length = len(target) + (0 if is_train else pred_length) - assert out["age"].shape[-1] == expected_length - assert np.allclose( - out["age"], - np.log10(2.0 + np.arange(expected_length)).reshape((1, expected_length)), - ) - - -@pytest.mark.parametrize("pick_incomplete", TEST_VALUES["allow_target_padding"]) -@pytest.mark.parametrize("is_train", TEST_VALUES["is_train"]) -@pytest.mark.parametrize("target", TEST_VALUES["target"]) -@pytest.mark.parametrize("start", TEST_VALUES["start"]) -def test_InstanceSplitter(start, target, is_train: bool, pick_incomplete: bool): - train_length = 100 - pred_length = 13 - t = transform.InstanceSplitter( - target_field=FieldName.TARGET, - is_pad_field=FieldName.IS_PAD, - start_field=FieldName.START, - forecast_start_field=FieldName.FORECAST_START, - train_sampler=transform.UniformSplitSampler(p=1.0), - past_length=train_length, - future_length=pred_length, - time_series_fields=["some_time_feature"], - pick_incomplete=pick_incomplete, - ) - - other_feat = np.arange(len(target) + 100) - data = { - "start": start, - "target": target, - "some_time_feature": other_feat, - "some_other_col": "ABC", - } - - if not is_train and not pick_incomplete and len(target) < train_length: - with pytest.raises(AssertionError): - out = list(t.flatmap_transform(data, is_train=is_train)) - return - else: - out = list(t.flatmap_transform(data, is_train=is_train)) - - if is_train: - assert len(out) == max( - 0, len(target) - pred_length + 1 - (0 if pick_incomplete else train_length), - ) - else: - assert len(out) == 1 - - for o in out: - assert "target" not in o - assert "some_time_feature" not in o - assert "some_other_col" in o - - assert len(o["past_some_time_feature"]) == train_length - assert len(o["past_target"]) == train_length - - if is_train: - assert len(o["future_target"]) == pred_length - assert len(o["future_some_time_feature"]) == pred_length - else: - assert len(o["future_target"]) == 0 - assert len(o["future_some_time_feature"]) == pred_length - - # expected_length = len(target) + (0 if is_train else pred_length) - # assert len(out['age']) == expected_length - # assert np.alltrue(out['age'] == np.log10(2.0 + np.arange(expected_length))) - - -@pytest.mark.parametrize("is_train", TEST_VALUES["is_train"]) -@pytest.mark.parametrize("target", TEST_VALUES["target"]) -@pytest.mark.parametrize("start", TEST_VALUES["start"]) -@pytest.mark.parametrize( - "use_prediction_features", TEST_VALUES["use_prediction_features"] -) -@pytest.mark.parametrize("allow_target_padding", TEST_VALUES["allow_target_padding"]) -def test_CanonicalInstanceSplitter( - start, - target, - is_train: bool, - use_prediction_features: bool, - allow_target_padding: bool, -): - train_length = 100 - pred_length = 13 - t = transform.CanonicalInstanceSplitter( - target_field=FieldName.TARGET, - is_pad_field=FieldName.IS_PAD, - start_field=FieldName.START, - forecast_start_field=FieldName.FORECAST_START, - instance_sampler=transform.UniformSplitSampler(p=1.0), - instance_length=train_length, - prediction_length=pred_length, - time_series_fields=["some_time_feature"], - allow_target_padding=allow_target_padding, - use_prediction_features=use_prediction_features, - ) - - other_feat = np.arange(len(target) + 100) - data = { - "start": start, - "target": target, - "some_time_feature": other_feat, - "some_other_col": "ABC", - } - - out = list(t.flatmap_transform(data, is_train=is_train)) - - min_num_instances = 1 if allow_target_padding else 0 - if is_train: - assert len(out) == max(min_num_instances, len(target) - train_length + 1) - else: - assert len(out) == 1 - - for o in out: - assert "target" not in o - assert "future_target" not in o - assert "some_time_feature" not in o - assert "some_other_col" in o - - assert len(o["past_some_time_feature"]) == train_length - assert len(o["past_target"]) == train_length - - if use_prediction_features and not is_train: - assert len(o["future_some_time_feature"]) == pred_length - - -def test_Transformation(): - train_length = 100 - ds = ListDataset( - [{"start": "2012-01-01", "target": [0.2] * train_length}], freq="1D" - ) - - pred_length = 10 - - t = transform.Chain( - trans=[ - transform.AddTimeFeatures( - start_field=FieldName.START, - target_field=FieldName.TARGET, - output_field="time_feat", - time_features=[ - time_feature.DayOfWeek(), - time_feature.DayOfMonth(), - time_feature.MonthOfYear(), - ], - pred_length=pred_length, - ), - transform.AddAgeFeature( - target_field=FieldName.TARGET, - output_field="age", - pred_length=pred_length, - log_scale=True, - ), - transform.AddObservedValuesIndicator( - target_field=FieldName.TARGET, output_field="observed_values" - ), - transform.VstackFeatures( - output_field="dynamic_feat", - input_fields=["age", "time_feat"], - drop_inputs=True, - ), - transform.InstanceSplitter( - target_field=FieldName.TARGET, - is_pad_field=FieldName.IS_PAD, - start_field=FieldName.START, - forecast_start_field=FieldName.FORECAST_START, - train_sampler=transform.ExpectedNumInstanceSampler(num_instances=4), - past_length=train_length, - future_length=pred_length, - time_series_fields=["dynamic_feat", "observed_values"], - ), - ] - ) - - for u in t(iter(ds), is_train=True): - print(u) - - -@pytest.mark.parametrize("is_train", TEST_VALUES["is_train"]) -def test_multi_dim_transformation(is_train): - train_length = 10 - - first_dim: list = list(np.arange(1, 11, 1)) - first_dim[-1] = "NaN" - - second_dim: list = list(np.arange(11, 21, 1)) - second_dim[0] = "NaN" - - ds = ListDataset( - data_iter=[{"start": "2012-01-01", "target": [first_dim, second_dim]}], - freq="1D", - one_dim_target=False, - ) - pred_length = 2 - - # Looks weird - but this is necessary to assert the nan entries correctly. - first_dim[-1] = np.nan - second_dim[0] = np.nan - - t = transform.Chain( - trans=[ - transform.AddTimeFeatures( - start_field=FieldName.START, - target_field=FieldName.TARGET, - output_field="time_feat", - time_features=[ - time_feature.DayOfWeek(), - time_feature.DayOfMonth(), - time_feature.MonthOfYear(), - ], - pred_length=pred_length, - ), - transform.AddAgeFeature( - target_field=FieldName.TARGET, - output_field="age", - pred_length=pred_length, - log_scale=True, - ), - transform.AddObservedValuesIndicator( - target_field=FieldName.TARGET, - output_field="observed_values", - convert_nans=False, - ), - transform.VstackFeatures( - output_field="dynamic_feat", - input_fields=["age", "time_feat"], - drop_inputs=True, - ), - transform.InstanceSplitter( - target_field=FieldName.TARGET, - is_pad_field=FieldName.IS_PAD, - start_field=FieldName.START, - forecast_start_field=FieldName.FORECAST_START, - train_sampler=transform.ExpectedNumInstanceSampler(num_instances=4), - past_length=train_length, - future_length=pred_length, - time_series_fields=["dynamic_feat", "observed_values"], - time_first=False, - ), - ] - ) - - if is_train: - for u in t(iter(ds), is_train=True): - assert_shape(u["past_target"], (2, 10)) - assert_shape(u["past_dynamic_feat"], (4, 10)) - assert_shape(u["past_observed_values"], (2, 10)) - assert_shape(u["future_target"], (2, 2)) - - assert_padded_array( - u["past_observed_values"], - np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]), - u["past_is_pad"], - ) - assert_padded_array( - u["past_target"], np.array([first_dim, second_dim]), u["past_is_pad"], - ) - else: - for u in t(iter(ds), is_train=False): - assert_shape(u["past_target"], (2, 10)) - assert_shape(u["past_dynamic_feat"], (4, 10)) - assert_shape(u["past_observed_values"], (2, 10)) - assert_shape(u["future_target"], (2, 0)) - - assert_padded_array( - u["past_observed_values"], - np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]), - u["past_is_pad"], - ) - assert_padded_array( - u["past_target"], np.array([first_dim, second_dim]), u["past_is_pad"], - ) - - -def test_ExpectedNumInstanceSampler(): - N = 6 - train_length = 2 - pred_length = 1 - ds = make_dataset(N, train_length) - - t = transform.Chain( - trans=[ - transform.InstanceSplitter( - target_field=FieldName.TARGET, - is_pad_field=FieldName.IS_PAD, - start_field=FieldName.START, - forecast_start_field=FieldName.FORECAST_START, - train_sampler=transform.ExpectedNumInstanceSampler(num_instances=4), - past_length=train_length, - future_length=pred_length, - pick_incomplete=True, - ) - ] - ) - - scale_hist = ScaleHistogram() - - repetition = 2 - for i in range(repetition): - for data in t(iter(ds), is_train=True): - target_values = data["past_target"] - # for simplicity, discard values that are zeros to avoid confusion with padding - target_values = target_values[target_values > 0] - scale_hist.add(target_values) - - expected_values = {i: 2 ** i * repetition for i in range(1, N)} - - assert expected_values == scale_hist.bin_counts - - -def test_BucketInstanceSampler(): - N = 6 - train_length = 2 - pred_length = 1 - ds = make_dataset(N, train_length) - - dataset_stats = calculate_dataset_statistics(ds) - - t = transform.Chain( - trans=[ - transform.InstanceSplitter( - target_field=FieldName.TARGET, - is_pad_field=FieldName.IS_PAD, - start_field=FieldName.START, - forecast_start_field=FieldName.FORECAST_START, - train_sampler=transform.BucketInstanceSampler( - dataset_stats.scale_histogram - ), - past_length=train_length, - future_length=pred_length, - pick_incomplete=True, - ) - ] - ) - - scale_hist = ScaleHistogram() - - repetition = 200 - for i in range(repetition): - for data in t(iter(ds), is_train=True): - target_values = data["past_target"] - # for simplicity, discard values that are zeros to avoid confusion with padding - target_values = target_values[target_values > 0] - scale_hist.add(target_values) - - expected_values = {i: repetition for i in range(1, N)} - found_values = scale_hist.bin_counts - - for i in range(1, N): - assert abs(expected_values[i] - found_values[i] < expected_values[i] * 0.3) - - -def test_cdf_to_gaussian_transformation(): - def make_test_data(): - target = np.array( - [0, 0, 0, 0, 10, 10, 20, 20, 30, 30, 40, 50, 59, 60, 60, 70, 80, 90, 100,] - ).tolist() - - np.random.shuffle(target) - - multi_dim_target = np.array([target, target]).transpose() - - past_is_pad = np.array([[0] * len(target)]).transpose() - - past_observed_target = np.array( - [[1] * len(target), [1] * len(target)] - ).transpose() - - ds = ListDataset( - # Mimic output from InstanceSplitter - data_iter=[ - { - "start": "2012-01-01", - "target": multi_dim_target, - "past_target": multi_dim_target, - "future_target": multi_dim_target, - "past_is_pad": past_is_pad, - f"past_{FieldName.OBSERVED_VALUES}": past_observed_target, - } - ], - freq="1D", - one_dim_target=False, - ) - return ds - - def make_fake_output(u: DataEntry): - fake_output = np.expand_dims( - np.expand_dims(u["past_target_cdf"], axis=0), axis=0 - ) - return fake_output - - ds = make_test_data() - - t = transform.Chain( - trans=[ - transform.CDFtoGaussianTransform( - target_field=FieldName.TARGET, - observed_values_field=FieldName.OBSERVED_VALUES, - max_context_length=20, - target_dim=2, - ) - ] - ) - - for u in t(iter(ds), is_train=False): - - fake_output = make_fake_output(u) - - # Fake transformation chain output - u["past_target_sorted"] = torch.tensor( - np.expand_dims(u["past_target_sorted"], axis=0) - ) - - u["slopes"] = torch.tensor(np.expand_dims(u["slopes"], axis=0)) - - u["intercepts"] = torch.tensor(np.expand_dims(u["intercepts"], axis=0)) - - back_transformed = transform.cdf_to_gaussian_forward_transform(u, fake_output) - - # Get any sample/batch (slopes[i][:, d]they are all the same) - back_transformed = back_transformed[0][0] - - original_target = u["target"] - - # Original target and back-transformed target should be the same - assert np.allclose(original_target, back_transformed) - - -def test_gaussian_cdf(): - try: - from scipy.stats import norm - except: - pytest.skip("scipy not installed skipping test for erf") - - x = np.array( - [-1000, -100, -10] + np.linspace(-2, 2, 1001).tolist() + [10, 100, 1000] - ) - y_gluonts = transform.CDFtoGaussianTransform.standard_gaussian_cdf(x) - y_scipy = norm.cdf(x) - - assert np.allclose(y_gluonts, y_scipy, atol=1e-7) - - -def test_gaussian_ppf(): - try: - from scipy.stats import norm - except: - pytest.skip("scipy not installed skipping test for erf") - - x = np.linspace(0.0001, 0.9999, 1001) - y_gluonts = transform.CDFtoGaussianTransform.standard_gaussian_ppf(x) - y_scipy = norm.ppf(x) - - assert np.allclose(y_gluonts, y_scipy, atol=1e-7) - - -def test_target_dim_indicator(): - target = np.array([0, 2, 3, 10]).tolist() - - multi_dim_target = np.array([target, target, target, target]) - dataset = ListDataset( - data_iter=[{"start": "2012-01-01", "target": multi_dim_target}], - freq="1D", - one_dim_target=False, - ) - - t = transform.Chain( - trans=[ - transform.TargetDimIndicator( - target_field=FieldName.TARGET, field_name="target_dimensions" - ) - ] - ) - - for data_entry in t(dataset, is_train=True): - assert (data_entry["target_dimensions"] == np.array([0, 1, 2, 3])).all() - - -@pytest.fixture -def point_process_dataset(): - - ia_times = np.array([0.2, 0.7, 0.2, 0.5, 0.3, 0.3, 0.2, 0.1]) - marks = np.array([0, 1, 2, 0, 1, 2, 2, 2]) - - lds = ListDataset( - [ - { - "target": np.c_[ia_times, marks].T, - "start": pd.Timestamp("2011-01-01 00:00:00", freq="H"), - "end": pd.Timestamp("2011-01-01 03:00:00", freq="H"), - } - ], - freq="H", - one_dim_target=False, - ) - - return lds - - -class MockContinuousTimeSampler(transform.ContinuousTimePointSampler): - # noinspection PyMissingConstructor,PyUnusedLocal - def __init__(self, ret_values, *args, **kwargs): - self._ret_values = ret_values - - def __call__(self, *args, **kwargs): - return np.array(self._ret_values) - - -def test_ctsplitter_mask_sorted(point_process_dataset): - d = next(iter(point_process_dataset)) - - ia_times = d["target"][0, :] - - ts = np.cumsum(ia_times) - - splitter = transform.ContinuousTimeInstanceSplitter( - 2, 1, train_sampler=transform.ContinuousTimeUniformSampler(num_instances=10), - ) - - # no boundary conditions - res = splitter._mask_sorted(ts, 1, 2) - assert all([a == b for a, b in zip([2, 3, 4], res)]) - - # lower bound equal, exclusive of upper bound - res = splitter._mask_sorted(np.array([1, 2, 3, 4, 5, 6]), 1, 2) - assert all([a == b for a, b in zip([0], res)]) - - -def test_ctsplitter_no_train_last_point(point_process_dataset): - splitter = transform.ContinuousTimeInstanceSplitter( - 2, 1, train_sampler=transform.ContinuousTimeUniformSampler(num_instances=10), - ) - - iter_de = splitter(point_process_dataset, is_train=False) - - d_out = next(iter(iter_de)) - - assert "future_target" not in d_out - assert "future_valid_length" not in d_out - assert "past_target" in d_out - assert "past_valid_length" in d_out - - assert d_out["past_valid_length"] == 6 - assert np.allclose( - [0.1, 0.5, 0.3, 0.3, 0.2, 0.1], d_out["past_target"][..., 0], atol=0.01 - ) - - -def test_ctsplitter_train_correct(point_process_dataset): - splitter = transform.ContinuousTimeInstanceSplitter( - 1, - 1, - train_sampler=MockContinuousTimeSampler( - ret_values=[1.01, 1.5, 1.99], num_instances=3 - ), - ) - - iter_de = splitter(point_process_dataset, is_train=True) - - outputs = list(iter_de) - - assert outputs[0]["past_valid_length"] == 2 - assert outputs[0]["future_valid_length"] == 3 - - assert np.allclose(outputs[0]["past_target"], np.array([[0.19, 0.7], [0, 1]]).T) - assert np.allclose( - outputs[0]["future_target"], np.array([[0.09, 0.5, 0.3], [2, 0, 1]]).T - ) - - assert outputs[1]["past_valid_length"] == 2 - assert outputs[1]["future_valid_length"] == 4 - - assert outputs[2]["past_valid_length"] == 3 - assert outputs[2]["future_valid_length"] == 3 - - -def test_ctsplitter_train_correct_out_count(point_process_dataset): - - # produce new TPP data by shuffling existing TS instance - def shuffle_iterator(num_duplications=5): - for entry in point_process_dataset: - for i in range(num_duplications): - d = dict.copy(entry) - d["target"] = np.random.permutation(d["target"].T).T - yield d - - splitter = transform.ContinuousTimeInstanceSplitter( - 1, - 1, - train_sampler=MockContinuousTimeSampler( - ret_values=[1.01, 1.5, 1.99], num_instances=3 - ), - ) - - iter_de = splitter(shuffle_iterator(), is_train=True) - - outputs = list(iter_de) - - assert len(outputs) == 5 * 3 - - -def test_ctsplitter_train_samples_correct_times(point_process_dataset): - - splitter = transform.ContinuousTimeInstanceSplitter( - 1.25, 1.25, train_sampler=transform.ContinuousTimeUniformSampler(20) - ) - - iter_de = splitter(point_process_dataset, is_train=True) - - assert all( - [ - ( - pd.Timestamp("2011-01-01 01:15:00") - <= d["forecast_start"] - <= pd.Timestamp("2011-01-01 01:45:00") - ) - for d in iter_de - ] - ) - - -def test_ctsplitter_train_short_intervals(point_process_dataset): - splitter = transform.ContinuousTimeInstanceSplitter( - 0.01, - 0.01, - train_sampler=MockContinuousTimeSampler( - ret_values=[1.01, 1.5, 1.99], num_instances=3 - ), - ) - - iter_de = splitter(point_process_dataset, is_train=True) - - for d in iter_de: - assert d["future_valid_length"] == d["past_valid_length"] == 0 - assert np.prod(np.shape(d["past_target"])) == 0 - assert np.prod(np.shape(d["future_target"])) == 0 - - -def make_dataset(N, train_length): - # generates 2 ** N - 1 timeseries with constant increasing values - n = 2 ** N - 1 - targets = np.ones((n, train_length)) - for i in range(0, n): - targets[i, :] = targets[i, :] * i - - ds = ListDataset( - data_iter=[{"start": "2012-01-01", "target": targets[i, :]} for i in range(n)], - freq="1D", - ) - - return ds - - -def assert_shape(array: np.array, reference_shape: Tuple[int, int]): - assert ( - array.shape == reference_shape - ), f"Shape should be {reference_shape} but found {array.shape}." - - -def assert_padded_array( - sampled_array: np.array, reference_array: np.array, padding_array: np.array -): - num_padded = int(np.sum(padding_array)) - sampled_no_padding = sampled_array[:, num_padded:] - - reference_array = np.roll(reference_array, num_padded, axis=1) - reference_no_padding = reference_array[:, num_padded:] - - # Convert nans to dummy value for assertion because - # np.nan == np.nan -> False. - reference_no_padding[np.isnan(reference_no_padding)] = 9999.0 - sampled_no_padding[np.isnan(sampled_no_padding)] = 9999.0 - - reference_no_padding = np.array(reference_no_padding, dtype=np.float32) - - assert (sampled_no_padding == reference_no_padding).all(), ( - f"Sampled and reference arrays do not match. '" - f"Got {sampled_no_padding} but should be {reference_no_padding}." - )