diff --git a/examples/Time-Grad-Electricity.ipynb b/examples/Time-Grad-Electricity.ipynb
index fd2bf01..556895f 100644
--- a/examples/Time-Grad-Electricity.ipynb
+++ b/examples/Time-Grad-Electricity.ipynb
@@ -5,8 +5,24 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.228737Z",
-     "start_time": "2022-12-23T07:08:24.806396Z"
+     "end_time": "2022-12-23T09:57:39.738715Z",
+     "start_time": "2022-12-23T09:57:39.730077Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# autoreload import your package\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T09:57:40.738250Z",
+     "start_time": "2022-12-23T09:57:39.739677Z"
     }
    },
    "outputs": [],
@@ -22,23 +38,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.417012Z",
-     "start_time": "2022-12-23T07:08:26.230461Z"
+     "end_time": "2022-12-23T09:57:40.909476Z",
+     "start_time": "2022-12-23T09:57:40.739741Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/wassname/miniforge3/envs/gluonts10.0/lib/python3.9/site-packages/gluonts/json.py:101: UserWarning: Using `json`-module for json-handling. Consider installing one of `orjson`, `ujson` to speed up serialization and deserialization.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from gluonts.dataset.multivariate_grouper import MultivariateGrouper\n",
     "from gluonts.dataset.repository.datasets import dataset_recipes, get_dataset\n",
@@ -48,11 +55,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.469299Z",
-     "start_time": "2022-12-23T07:08:26.419051Z"
+     "end_time": "2022-12-23T09:57:40.966206Z",
+     "start_time": "2022-12-23T09:57:40.910477Z"
     }
    },
    "outputs": [],
@@ -65,11 +72,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.477478Z",
-     "start_time": "2022-12-23T07:08:26.470628Z"
+     "end_time": "2022-12-23T09:57:40.980883Z",
+     "start_time": "2022-12-23T09:57:40.967157Z"
     }
    },
    "outputs": [],
@@ -79,11 +86,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.494456Z",
-     "start_time": "2022-12-23T07:08:26.478496Z"
+     "end_time": "2022-12-23T09:57:41.003468Z",
+     "start_time": "2022-12-23T09:57:40.981830Z"
     }
    },
    "outputs": [],
@@ -147,11 +154,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.516023Z",
-     "start_time": "2022-12-23T07:08:26.495676Z"
+     "end_time": "2022-12-23T09:57:41.025224Z",
+     "start_time": "2022-12-23T09:57:41.004330Z"
     }
    },
    "outputs": [
@@ -169,11 +176,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.532361Z",
-     "start_time": "2022-12-23T07:08:26.517210Z"
+     "end_time": "2022-12-23T09:57:41.041326Z",
+     "start_time": "2022-12-23T09:57:41.026069Z"
     },
     "scrolled": true
    },
@@ -185,11 +192,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:26.554669Z",
-     "start_time": "2022-12-23T07:08:26.534354Z"
+     "end_time": "2022-12-23T09:57:41.057694Z",
+     "start_time": "2022-12-23T09:57:41.042892Z"
     }
    },
    "outputs": [
@@ -199,7 +206,7 @@
        "MetaData(freq='H', target=None, feat_static_cat=[CategoricalFeatureInfo(name='feat_static_cat_0', cardinality='370')], feat_static_real=[], feat_dynamic_real=[], feat_dynamic_cat=[], prediction_length=24)"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -210,67 +217,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2022-12-23T07:12:03.693420Z",
      "start_time": "2022-12-23T07:12:02.845649Z"
     }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "7"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:12:11.716051Z",
-     "start_time": "2022-12-23T07:12:11.712672Z"
+     "end_time": "2022-12-23T09:57:41.073075Z",
+     "start_time": "2022-12-23T09:57:41.058526Z"
     }
    },
    "outputs": [],
    "source": [
-    "train_grouper = MultivariateGrouper(max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))\n",
-    "\n",
-    "test_grouper = MultivariateGrouper(\n",
-    "#     num_test_dates=int(len(dataset.test)/len(dataset.train)*2),\n",
-    "    num_test_dates=7,\n",
-    "                                   max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))"
+    "train_grouper = MultivariateGrouper(max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {
     "ExecuteTime": {
-     "start_time": "2022-12-23T07:12:12.382Z"
+     "end_time": "2022-12-23T09:57:44.134871Z",
+     "start_time": "2022-12-23T09:57:41.073880Z"
     }
    },
    "outputs": [],
    "source": [
-    "dataset_train = train_grouper(dataset.train)\n",
-    "dataset_test = test_grouper(dataset.test)"
+    "dataset_train = train_grouper(dataset.train)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:09:49.342107Z",
-     "start_time": "2022-12-23T07:09:49.342097Z"
+     "end_time": "2022-12-23T09:57:44.153700Z",
+     "start_time": "2022-12-23T09:57:44.136099Z"
     }
    },
    "outputs": [],
@@ -300,22 +291,265 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:09:49.342839Z",
-     "start_time": "2022-12-23T07:09:49.342830Z"
+     "start_time": "2022-12-23T09:57:39.746Z"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "744b148440674ef3902ca56622cb001b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "88abd67eb98d4569b276f98dadd73205",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f5e08c9306c647cd8589f29d8652177b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "10a4ac2006db4f6090dc3f08cc3e3e9e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6458a98361854786bc4fa81da37471d2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "676d661e21ec42acbb544a8848264674",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "36010cebcd884d248de158890d2ba324",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f7487fae1ae54dae8c71d23ea5721e7f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6a995bf552bb487cbca2a0f3e45098b3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3ee8b7cdd0b94465a78c43d0917910ae",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "958283558c6a4e958c5c67188e735ed9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "predictor = estimator.train(dataset_train, num_workers=0)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:09:49.343440Z",
-     "start_time": "2022-12-23T07:09:49.343431Z"
+     "start_time": "2022-12-23T09:57:39.748Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "test_grouper = MultivariateGrouper(\n",
+    "    num_test_dates=int(len(dataset.test)/len(dataset.train)*2),\n",
+    "#     num_test_dates=7,\n",
+    "                                   max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:57:39.749Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "dataset_test = test_grouper(dataset.test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:57:39.750Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "next(iter(dataset.test.iter_sequential()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:57:39.751Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "len(dataset.test)\n",
+    "x = [x['target'].shape for x in dataset.test.iter_sequential()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:57:39.752Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pd.Series(x).value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:57:39.753Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%debug"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:57:39.754Z"
     }
    },
    "outputs": [],
@@ -330,8 +564,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:09:49.344138Z",
-     "start_time": "2022-12-23T07:09:49.344129Z"
+     "start_time": "2022-12-23T09:57:39.756Z"
     }
    },
    "outputs": [],
@@ -345,8 +578,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:09:49.344864Z",
-     "start_time": "2022-12-23T07:09:49.344855Z"
+     "start_time": "2022-12-23T09:57:39.756Z"
     }
    },
    "outputs": [],
@@ -364,8 +596,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:38.402632Z",
-     "start_time": "2022-12-23T07:08:38.402624Z"
+     "start_time": "2022-12-23T09:57:39.757Z"
     }
    },
    "outputs": [],
@@ -379,8 +610,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:38.403367Z",
-     "start_time": "2022-12-23T07:08:38.403359Z"
+     "start_time": "2022-12-23T09:57:39.758Z"
     },
     "scrolled": true
    },
@@ -394,8 +624,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:08:38.404060Z",
-     "start_time": "2022-12-23T07:08:38.404051Z"
+     "start_time": "2022-12-23T09:57:39.759Z"
     }
    },
    "outputs": [],
@@ -419,9 +648,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "gluonts10.0",
+   "display_name": "glounts",
    "language": "python",
-   "name": "gluonts10.0"
+   "name": "glounts"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/examples/Time-Grad2-Electricity.ipynb b/examples/Time-Grad2-Electricity.ipynb
index 9d698bb..59b353e 100644
--- a/examples/Time-Grad2-Electricity.ipynb
+++ b/examples/Time-Grad2-Electricity.ipynb
@@ -5,8 +5,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:03.396739Z",
-     "start_time": "2022-12-23T07:10:03.388069Z"
+     "end_time": "2022-12-23T09:43:54.678816Z",
+     "start_time": "2022-12-23T09:43:54.672175Z"
     }
    },
    "outputs": [],
@@ -24,8 +24,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:04.623030Z",
-     "start_time": "2022-12-23T07:10:03.397747Z"
+     "end_time": "2022-12-23T09:43:55.584475Z",
+     "start_time": "2022-12-23T09:43:54.679524Z"
     }
    },
    "outputs": [],
@@ -44,8 +44,8 @@
    "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:04.837709Z",
-     "start_time": "2022-12-23T07:10:04.624882Z"
+     "end_time": "2022-12-23T09:43:55.755092Z",
+     "start_time": "2022-12-23T09:43:55.585959Z"
     }
    },
    "outputs": [],
@@ -61,8 +61,8 @@
    "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:04.943831Z",
-     "start_time": "2022-12-23T07:10:04.839132Z"
+     "end_time": "2022-12-23T09:43:55.836294Z",
+     "start_time": "2022-12-23T09:43:55.756050Z"
     }
    },
    "outputs": [],
@@ -78,8 +78,8 @@
    "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:04.998731Z",
-     "start_time": "2022-12-23T07:10:04.945411Z"
+     "end_time": "2022-12-23T09:43:55.875122Z",
+     "start_time": "2022-12-23T09:43:55.837634Z"
     }
    },
    "outputs": [],
@@ -87,13 +87,429 @@
     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:05.023583Z",
-     "start_time": "2022-12-23T07:10:05.000003Z"
+     "end_time": "2022-12-23T09:43:55.890801Z",
+     "start_time": "2022-12-23T09:43:55.875991Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Available datasets: ['constant', 'exchange_rate', 'solar-energy', 'electricity', 'traffic', 'exchange_rate_nips', 'electricity_nips', 'traffic_nips', 'solar_nips', 'wiki-rolling_nips', 'taxi_30min', 'kaggle_web_traffic_with_missing', 'kaggle_web_traffic_without_missing', 'kaggle_web_traffic_weekly', 'm1_yearly', 'm1_quarterly', 'm1_monthly', 'nn5_daily_with_missing', 'nn5_daily_without_missing', 'nn5_weekly', 'tourism_monthly', 'tourism_quarterly', 'tourism_yearly', 'cif_2016', 'london_smart_meters_without_missing', 'wind_farms_without_missing', 'car_parts_without_missing', 'dominick', 'fred_md', 'pedestrian_counts', 'hospital', 'covid_deaths', 'kdd_cup_2018_without_missing', 'weather', 'm3_monthly', 'm3_quarterly', 'm3_yearly', 'm3_other', 'm4_hourly', 'm4_daily', 'm4_weekly', 'm4_monthly', 'm4_quarterly', 'm4_yearly', 'm5', 'uber_tlc_daily', 'uber_tlc_hourly', 'airpassengers']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Available datasets: {list(dataset_recipes.keys())}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T09:43:55.906924Z",
+     "start_time": "2022-12-23T09:43:55.891681Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# exchange_rate_nips, electricity_nips, traffic_nips, solar_nips, wiki-rolling_nips, ## taxi_30min is buggy still\n",
+    "dataset = get_dataset(\"electricity_nips\", regenerate=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T09:43:55.923232Z",
+     "start_time": "2022-12-23T09:43:55.907597Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "MetaData(freq='H', target=None, feat_static_cat=[CategoricalFeatureInfo(name='feat_static_cat_0', cardinality='370')], feat_static_real=[], feat_dynamic_real=[], feat_dynamic_cat=[], prediction_length=24)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset.metadata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T09:43:56.450920Z",
+     "start_time": "2022-12-23T09:43:55.924531Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "train_grouper = MultivariateGrouper(max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))\n",
+    "\n",
+    "test_grouper = MultivariateGrouper(\n",
+    "    num_test_dates=int(len(dataset.test)/len(dataset.train)*2),\n",
+    "#     num_test_dates=7,\n",
+    "                                   max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T09:43:59.956426Z",
+     "start_time": "2022-12-23T09:43:56.451881Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/wassname/miniforge3/envs/glounts/lib/python3.9/site-packages/gluonts/dataset/multivariate_grouper.py:191: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  return {FieldName.TARGET: np.array([funcs(data) for data in dataset])}\n"
+     ]
+    }
+   ],
+   "source": [
+    "dataset_train = train_grouper(dataset.train)\n",
+    "dataset_test = test_grouper(dataset.test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T09:43:59.970082Z",
+     "start_time": "2022-12-23T09:43:59.957922Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "estimator = TimeGradEstimator2(\n",
+    "    target_dim=int(dataset.metadata.feat_static_cat[0].cardinality),\n",
+    "    prediction_length=dataset.metadata.prediction_length,\n",
+    "    context_length=dataset.metadata.prediction_length,\n",
+    "    cell_type='GRU',\n",
+    "    input_size=1484,\n",
+    "    freq=dataset.metadata.freq,\n",
+    "    loss_type='l2',\n",
+    "    scaling=True,\n",
+    "    diff_steps=100,\n",
+    "    beta_end=0.1,\n",
+    "    beta_schedule=\"linear\",\n",
+    "    trainer=Trainer(device=device,\n",
+    "                    epochs=20,\n",
+    "                    learning_rate=1e-3,\n",
+    "                    num_batches_per_epoch=100,\n",
+    "                    batch_size=64,)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:43:54.691Z"
+    },
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a16f9f9d61ef42e7beca0a83aad84d5f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7a7aed6358a3473c829a6736e3cb4804",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "aabe23ac059144e489ffc1bcdf0842b3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e0285fd6fedd41228b9c10463e635c6a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "001057df67ef453784dd5fb487ef8c79",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5b4e3798f19f4969a6bcddeb200ce2df",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c5c8a4aa42f6469b9c6224f4ab91cd87",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "87627507bf9a4c91a5ca459ebb6838e5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "011af0b6d19d47369b6ba49c89f176de",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f9036005523640e88907dec1e5f6ea9e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8107b099dd1347c18f47e17193d810dc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b10c71c42fa143488495f7d0f4c10372",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0004e61d284b4d248212d8142f0ba939",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/99 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# predictor = estimator.train(dataset_train, num_workers=0)\n",
+    "predictor = estimator.train(dataset_train, num_workers=8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:43:54.693Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# %debug"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T07:26:02.763990Z",
+     "start_time": "2022-12-23T07:26:02.737015Z"
+    }
+   },
+   "source": [
+    "## Forecast"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:43:54.694Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "forecast_it, ts_it = make_evaluation_predictions(dataset=dataset_test,\n",
+    "                                                 predictor=predictor,\n",
+    "                                                 num_samples=100)\n",
+    "\n",
+    "forecasts = list(forecast_it)\n",
+    "targets = list(ts_it)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:43:54.696Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# %debug"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-12-23T08:13:45.685401Z",
+     "start_time": "2022-12-23T08:11:01.825222Z"
+    }
+   },
+   "source": [
+    "## Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "ExecuteTime": {
+     "start_time": "2022-12-23T09:43:54.697Z"
     }
    },
    "outputs": [],
@@ -152,292 +568,8 @@
     "    axx[0].legend(legend, loc=\"upper left\")\n",
     "    \n",
     "    if fname is not None:\n",
-    "        plt.savefig(fname, bbox_inches='tight', pad_inches=0.05)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:05.045150Z",
-     "start_time": "2022-12-23T07:10:05.024529Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Available datasets: ['constant', 'exchange_rate', 'solar-energy', 'electricity', 'traffic', 'exchange_rate_nips', 'electricity_nips', 'traffic_nips', 'solar_nips', 'wiki-rolling_nips', 'taxi_30min', 'kaggle_web_traffic_with_missing', 'kaggle_web_traffic_without_missing', 'kaggle_web_traffic_weekly', 'm1_yearly', 'm1_quarterly', 'm1_monthly', 'nn5_daily_with_missing', 'nn5_daily_without_missing', 'nn5_weekly', 'tourism_monthly', 'tourism_quarterly', 'tourism_yearly', 'cif_2016', 'london_smart_meters_without_missing', 'wind_farms_without_missing', 'car_parts_without_missing', 'dominick', 'fred_md', 'pedestrian_counts', 'hospital', 'covid_deaths', 'kdd_cup_2018_without_missing', 'weather', 'm3_monthly', 'm3_quarterly', 'm3_yearly', 'm3_other', 'm4_hourly', 'm4_daily', 'm4_weekly', 'm4_monthly', 'm4_quarterly', 'm4_yearly', 'm5', 'uber_tlc_daily', 'uber_tlc_hourly', 'airpassengers']\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"Available datasets: {list(dataset_recipes.keys())}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:05.061294Z",
-     "start_time": "2022-12-23T07:10:05.046087Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# exchange_rate_nips, electricity_nips, traffic_nips, solar_nips, wiki-rolling_nips, ## taxi_30min is buggy still\n",
-    "dataset = get_dataset(\"electricity_nips\", regenerate=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:05.077563Z",
-     "start_time": "2022-12-23T07:10:05.063312Z"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "MetaData(freq='H', target=None, feat_static_cat=[CategoricalFeatureInfo(name='feat_static_cat_0', cardinality='370')], feat_static_real=[], feat_dynamic_real=[], feat_dynamic_cat=[], prediction_length=24)"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset.metadata"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:05.855331Z",
-     "start_time": "2022-12-23T07:10:05.078457Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "train_grouper = MultivariateGrouper(max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))\n",
-    "\n",
-    "test_grouper = MultivariateGrouper(\n",
-    "    num_test_dates=int(len(dataset.test)/len(dataset.train)*2),\n",
-    "#     num_test_dates=7,\n",
-    "                                   max_target_dim=min(2000, int(dataset.metadata.feat_static_cat[0].cardinality)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:10.695915Z",
-     "start_time": "2022-12-23T07:10:05.856513Z"
-    }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/wassname/miniforge3/envs/glounts/lib/python3.9/site-packages/gluonts/dataset/multivariate_grouper.py:191: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
-      "  return {FieldName.TARGET: np.array([funcs(data) for data in dataset])}\n"
-     ]
-    }
-   ],
-   "source": [
-    "dataset_train = train_grouper(dataset.train)\n",
-    "dataset_test = test_grouper(dataset.test)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2022-12-23T07:10:10.751447Z",
-     "start_time": "2022-12-23T07:10:10.736880Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "estimator = TimeGradEstimator2(\n",
-    "    target_dim=int(dataset.metadata.feat_static_cat[0].cardinality),\n",
-    "    prediction_length=dataset.metadata.prediction_length,\n",
-    "    context_length=dataset.metadata.prediction_length,\n",
-    "    cell_type='GRU',\n",
-    "    input_size=1484,\n",
-    "    freq=dataset.metadata.freq,\n",
-    "    loss_type='l2',\n",
-    "    scaling=False,\n",
-    "    diff_steps=100,\n",
-    "    beta_end=0.1,\n",
-    "    beta_schedule=\"linear\",\n",
-    "    trainer=Trainer(device=device,\n",
-    "                    epochs=20,\n",
-    "                    learning_rate=1e-3,\n",
-    "                    num_batches_per_epoch=100,\n",
-    "                    batch_size=64,)\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.400Z"
-    },
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6938987ed5684176bbd50e619864fa5b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/99 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/media/wassname/SGIronWolf/projects5/timeseries/pytorch-ts/pts/model/time_grad2/gaussian_diffusion_ou.py:283: UserWarning: Using a target size (torch.Size([64, 1, 48])) that is different to the input size (torch.Size([64, 8, 48])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  loss = F.mse_loss(x_recon, noise_rand)\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7624b47aeb164cc2bc96f15aad09d5e3",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/99 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "25eccd2adff24ab2affa552c91f826c2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/99 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4ac98587176f4842bc63367d4a5085c2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/99 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "edecf5085cef40a1ac2cd43abd1c7e4e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/99 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "predictor = estimator.train(dataset_train, num_workers=0)\n",
-    "# predictor = estimator.train(dataset_train, num_workers=8)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.401Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# %debug"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.402Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "forecast_it, ts_it = make_evaluation_predictions(dataset=dataset_test,\n",
-    "                                                 predictor=predictor,\n",
-    "                                                 num_samples=100)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.403Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "forecasts = list(forecast_it)\n",
-    "targets = list(ts_it)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.404Z"
-    }
-   },
-   "outputs": [],
-   "source": [
+    "        plt.savefig(fname, bbox_inches='tight', pad_inches=0.05)\n",
+    "        \n",
     "plot(\n",
     "    target=targets[0],\n",
     "    forecast=forecasts[0],\n",
@@ -451,7 +583,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.405Z"
+     "start_time": "2022-12-23T09:43:54.699Z"
     }
    },
    "outputs": [],
@@ -465,7 +597,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.406Z"
+     "start_time": "2022-12-23T09:43:54.700Z"
     },
     "scrolled": true
    },
@@ -479,7 +611,7 @@
    "execution_count": null,
    "metadata": {
     "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.407Z"
+     "start_time": "2022-12-23T09:43:54.701Z"
     }
    },
    "outputs": [],
@@ -500,282 +632,6 @@
    "outputs": [],
    "source": []
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Scratch: debug transforms\n",
-    "\n",
-    "See estimator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.411Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "predictor = estimator.train(dataset_train, num_workers=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.412Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "%debug"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.414Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from tqdm.auto import tqdm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.415Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "transformation = estimator.create_transformation()\n",
-    "training_instance_splitter = estimator.create_instance_splitter(\"training\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.416Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "g1 = trans.apply(dataset_train, is_train=True)\n",
-    "b = next(iter(g1))\n",
-    "b.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.417Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "for _ in tqdm(g1):\n",
-    "    pass"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.418Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "transform = transformation + training_instance_splitter\n",
-    "g2 = transform.apply(dataset_train, is_train=True)\n",
-    "gg = iter(g2)\n",
-    "b = next(gg)\n",
-    "b.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.419Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "for _ in tqdm(g2):\n",
-    "    pass"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.420Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "b = next(gg)\n",
-    "b.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.422Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from pts.model import get_module_forward_input_names\n",
-    "from gluonts.transform import SelectFields, Transformation\n",
-    "\n",
-    "trained_net = estimator.create_training_network(estimator.trainer.device)\n",
-    "input_names = get_module_forward_input_names(trained_net)\n",
-    "transform = transformation + training_instance_splitter + SelectFields(input_names)\n",
-    "g = transform.apply(dataset_train, is_train=True)\n",
-    "b = next(iter(g))\n",
-    "b.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2022-12-23T05:18:19.966830Z",
-     "start_time": "2022-12-23T05:18:19.964901Z"
-    }
-   },
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.424Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from pts.dataset.loader import TransformedIterableDataset\n",
-    "training_data = dataset_train\n",
-    "training_iter_dataset = TransformedIterableDataset(\n",
-    "    dataset=training_data,\n",
-    "    transform=transformation\n",
-    "    + training_instance_splitter\n",
-    "    + SelectFields(input_names),\n",
-    "    is_train=True,\n",
-    "    shuffle_buffer_length=None,\n",
-    "#     cache_data=cache_data,\n",
-    ")\n",
-    "training_iter_dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.425Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "next(iter(training_iter_dataset))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.426Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from torch.utils.data import DataLoader\n",
-    "training_data_loader = DataLoader(\n",
-    "    training_iter_dataset,\n",
-    "    batch_size=estimator.trainer.batch_size,\n",
-    "    num_workers=0,\n",
-    "#     prefetch_factor=prefetch_factor,\n",
-    "    pin_memory=True,\n",
-    "    worker_init_fn=estimator._worker_init_fn,\n",
-    "#         **kwargs,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.428Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "next(iter(training_data_loader))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "start_time": "2022-12-23T07:10:03.429Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "for b in tqdm(training_data_loader):\n",
-    "    pass"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/pts/model/time_grad/time_grad_estimator.py b/pts/model/time_grad/time_grad_estimator.py
index 955d3e6..fdbdfbf 100644
--- a/pts/model/time_grad/time_grad_estimator.py
+++ b/pts/model/time_grad/time_grad_estimator.py
@@ -251,7 +251,7 @@ class TimeGradEstimator(PyTorchEstimator):
             input_names=input_names,
             prediction_net=prediction_network,
             batch_size=self.trainer.batch_size,
-            freq=self.freq,
+            # freq=self.freq,
             prediction_length=self.prediction_length,
             device=device,
         )
diff --git a/pts/model/time_grad2/epsilon_theta.py b/pts/model/time_grad2/epsilon_theta.py
index 3178087..7bb1e28 100644
--- a/pts/model/time_grad2/epsilon_theta.py
+++ b/pts/model/time_grad2/epsilon_theta.py
@@ -124,7 +124,7 @@ class EpsilonTheta2(nn.Module):
         # nn.init.zeros_(self.output_projection.weight)
         
         self.output_projection = nn.Sequential(
-            nn.Conv1d(residual_channels, residual_channels, 3),
+            nn.Conv1d(residual_channels, target_dim, 3),
             # nn.LeakyReLU(),
             # nn.Conv1d(residual_channels, target_dim, 3, padding="same"),
         )
diff --git a/pts/model/time_grad2/gaussian_diffusion_ou.py b/pts/model/time_grad2/gaussian_diffusion_ou.py
index 66c00ed..00eec7b 100644
--- a/pts/model/time_grad2/gaussian_diffusion_ou.py
+++ b/pts/model/time_grad2/gaussian_diffusion_ou.py
@@ -44,13 +44,18 @@ def get_ou(shape, mu=0, theta=0.1, sigma=.1):
     return OrnsteinUhlenbeckProcess(shape, mu=mu, theta=theta, sigma=sigma)
 
 def mk_noise(shape, device):
+    repeats = shape[1]
     shape = (shape[0], shape[2])
-    ou = get_ou(shape)
-    b = ou.sample()
-    b = [torch.from_numpy(bb).to(device).float() for bb in b]
-    C, noise_rand, noise_ou = b
-    noise_rand = noise_rand[:, None]
-    noise_ou = noise_ou[:, None]
+    ns = []
+    for _ in range(repeats):
+        ou = get_ou(shape)
+        b = ou.sample()
+        b = [torch.from_numpy(bb).to(device).float() for bb in b]
+        C, noise_rand, noise_ou = b
+        noise_rand = noise_rand[:, None]
+        noise_ou = noise_ou[:, None]
+        ns.append((C, noise_rand, noise_ou))
+    C, noise_rand, noise_ou = [torch.concat(n, 1) for n in zip(*ns)]
     return C, noise_rand, noise_ou
 
 def noise_like(x):    
@@ -222,7 +227,7 @@ class GaussianDiffusionOU(nn.Module):
         device = self.betas.device
 
         b = shape[0]
-        img = torch.randn(shape, device=device)
+        C, noise_rand, img = mk_noise(shape, device=device)
 
         for i in reversed(range(0, self.num_timesteps)):
             img = self.p_sample(
@@ -236,10 +241,10 @@ class GaussianDiffusionOU(nn.Module):
             shape = cond.shape[:-1] + (self.input_size,)
             # TODO reshape cond to (B*T, 1, -1)
             B, T, pred_len = cond.shape
-            shape = (B, 1, pred_len)
+            shape = (B, self.input_size, pred_len)
         else:
             shape = sample_shape
-        x_hat = self.p_sample_loop(shape, cond)  # TODO reshape x_hat to (B,T,-1)
+        x_hat = self.p_sample_loop(shape, cond)
 
         if self.scale is not None:
             x_hat *= self.scale
@@ -263,8 +268,8 @@ class GaussianDiffusionOU(nn.Module):
 
         return img
 
-    def q_sample(self, x_start, t, noise=None):
-        noise = default(noise, lambda: noise_like(x_start)[2])
+    def q_sample(self, x_start, t, noise):
+        # noise = default(noise, lambda: noise_like(x_start)[2])
 
         return (
             extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
diff --git a/pts/model/time_grad2/time_grad_network.py b/pts/model/time_grad2/time_grad_network.py
index 1734965..77ec43d 100644
--- a/pts/model/time_grad2/time_grad_network.py
+++ b/pts/model/time_grad2/time_grad_network.py
@@ -255,19 +255,19 @@ class TimeGradTrainingNetwork2(nn.Module):
             past_observed_values, 1 - past_is_pad.unsqueeze(-1)
         )
 
-        if future_time_feat is None or future_target_cdf is None:
-            time_feat = past_time_feat[:, -self.context_length :, ...]
-            sequence = past_target_cdf
-            sequence_length = self.history_length
-            subsequences_length = self.context_length
-        else:
-            time_feat = torch.cat(
-                (past_time_feat[:, -self.context_length :, ...], future_time_feat),
-                dim=1,
-            )
-            sequence = torch.cat((past_target_cdf, future_target_cdf), dim=1)
-            sequence_length = self.history_length + self.prediction_length
-            subsequences_length = self.context_length + self.prediction_length
+        # if future_time_feat is None or future_target_cdf is None:
+        time_feat = past_time_feat[:, -self.context_length:, ...]
+        sequence = past_target_cdf
+        sequence_length = self.history_length
+        subsequences_length = self.context_length
+        # else:
+        #     time_feat = torch.cat(
+        #         (past_time_feat[:, -self.context_length :, ...], future_time_feat),
+        #         dim=1,
+        #     )
+        #     sequence = torch.cat((past_target_cdf, future_target_cdf), dim=1)
+        #     sequence_length = self.history_length + self.prediction_length
+        #     subsequences_length = self.context_length + self.prediction_length
 
         # (batch_size, sub_seq_len, target_dim, num_lags)
         lags = self.get_lagged_subsequences(
@@ -389,16 +389,17 @@ class TimeGradTrainingNetwork2(nn.Module):
 
         # put together target sequence
         # (batch_size, seq_len, target_dim)
-        target = torch.cat(
-            (past_target_cdf[:, -self.context_length :, ...], future_target_cdf),
-            dim=1,
-        )
+        # target = torch.cat(
+        #     (past_target_cdf[:, -self.context_length :, ...], future_target_cdf),
+        #     dim=1,
+        # )
+        target = future_target_cdf
 
         # assert_shape(target, (-1, seq_len, self.target_dim))
 
         distr_args = self.distr_args(rnn_outputs=rnn_outputs)
         if self.scaling:
-            self.diffusion.scale = scale
+            self.diffusion.scale = scale.permute(0, 2, 1)
 
         # we sum the last axis to have the same shape for all likelihoods
         # (batch_size, subseq_length, 1)
@@ -448,11 +449,8 @@ class TimeGradPredictionNetwork2(TimeGradTrainingNetwork2):
 
     def sampling_decoder(
         self,
-        past_target_cdf: torch.Tensor,
-        target_dimension_indicator: torch.Tensor,
-        time_feat: torch.Tensor,
         scale: torch.Tensor,
-        begin_states: Union[List[torch.Tensor], torch.Tensor],
+        rnn_outputs: Union[List[torch.Tensor], torch.Tensor],
     ) -> torch.Tensor:
         """
         Computes sample paths by unrolling the RNN starting with a initial
@@ -470,8 +468,8 @@ class TimeGradPredictionNetwork2(TimeGradTrainingNetwork2):
             num_features)
         scale
             Mean scale for each time series (batch_size, 1, target_dim)
-        begin_states
-            List of initial states for the RNN layers (batch_size, num_cells)
+        rnn_outputs
+            Outputs of the unrolled RNN (batch_size, seq_len, num_cells)
         Returns
         --------
         sample_paths : Tensor
@@ -482,56 +480,35 @@ class TimeGradPredictionNetwork2(TimeGradTrainingNetwork2):
         def repeat(tensor, dim=0):
             return tensor.repeat_interleave(repeats=self.num_parallel_samples, dim=dim)
 
-        # blows-up the dimension of each tensor to
-        # batch_size * self.num_sample_paths for increasing parallelism
-        repeated_past_target_cdf = repeat(past_target_cdf)
-        repeated_time_feat = repeat(time_feat)
         repeated_scale = repeat(scale)
         if self.scaling:
             self.diffusion.scale = repeated_scale
-        repeated_target_dimension_indicator = repeat(target_dimension_indicator)
-
-        if self.cell_type == "LSTM":
-            repeated_states = [repeat(s, dim=1) for s in begin_states]
-        else:
-            repeated_states = repeat(begin_states, dim=1)
-
+            
         future_samples = []
-
-        # for each future time-units we draw new samples for this time-unit
-        # and update the state
-        for k in range(self.prediction_length):
-            lags = self.get_lagged_subsequences(
-                sequence=repeated_past_target_cdf,
-                sequence_length=self.history_length + k,
-                indices=self.shifted_lags,
-                subsequences_length=1,
-            )
-
-            rnn_outputs, repeated_states, _, _ = self.unroll(
-                begin_state=repeated_states,
-                lags=lags,
-                scale=repeated_scale,
-                time_feat=repeated_time_feat[:, k : k + 1, ...],
-                target_dimension_indicator=repeated_target_dimension_indicator,
-                unroll_length=1,
-            )
-
+        for _ in range(self.num_parallel_samples):
             distr_args = self.distr_args(rnn_outputs=rnn_outputs)
-
             # (batch_size, 1, target_dim)
-            new_samples = self.diffusion.sample(cond=distr_args)
+            distr_args = distr_args.permute(0, 2, 1)
+            samples = self.diffusion.sample(cond=distr_args)
+            future_samples.append(samples)
+        # import pdb; pdb.set_trace()
+        # rnn_outputs = torch.Size([7, 24, 40])
+        # torch.Size([7, 100, 370, 24])
+        # torch.Size([7, 370, 24])
+        samples = torch.stack(future_samples, dim=1).permute(0, 1, 3, 2)
 
-            # (batch_size, seq_len, target_dim)
-            future_samples.append(new_samples)
-            repeated_past_target_cdf = torch.cat(
-                (repeated_past_target_cdf, new_samples), dim=1
-            )
+        # # (batch_size, seq_len, target_dim)
+        # future_samples.append(new_samples)
+        # repeated_past_target_cdf = torch.cat(
+        #     (repeated_past_target_cdf, new_samples), dim=1
+        # )
 
         # (batch_size * num_samples, prediction_length, target_dim)
-        samples = torch.cat(future_samples, dim=1)
+        # samples = torch.cat(future_samples, dim=1)
 
         # (batch_size, num_samples, prediction_length, target_dim)
+        # print('samples.shape', samples.shape)
+        return samples
         return samples.reshape(
             (
                 -1,
@@ -586,21 +563,20 @@ class TimeGradPredictionNetwork2(TimeGradTrainingNetwork2):
             past_observed_values, 1 - past_is_pad.unsqueeze(-1)
         )
 
-        # unroll the decoder in "prediction mode", i.e. with past data only
-        _, begin_states, scale, _, _ = self.unroll_encoder(
+        
+        # unroll the decoder in "training mode", i.e. by providing future data
+        # as well
+        rnn_outputs, _, scale, _, _ = self.unroll_encoder(
             past_time_feat=past_time_feat,
             past_target_cdf=past_target_cdf,
             past_observed_values=past_observed_values,
             past_is_pad=past_is_pad,
-            future_time_feat=None,
+            future_time_feat=future_time_feat,
             future_target_cdf=None,
             target_dimension_indicator=target_dimension_indicator,
         )
 
         return self.sampling_decoder(
-            past_target_cdf=past_target_cdf,
-            target_dimension_indicator=target_dimension_indicator,
-            time_feat=future_time_feat,
             scale=scale,
-            begin_states=begin_states,
+            rnn_outputs=rnn_outputs,
         )