Files
Run-Skeleton-Run/trying_ddpg_with_implicit_dynamics.ipynb
2018-01-19 12:08:49 +08:00

456 lines
15 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2018-01-19T02:03:04.176331Z",
"start_time": "2018-01-19T02:03:04.170597Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"os.environ['CUDA_VISIBLE_DEVICES']=\"\"\n",
"os.environ[\"PYTHONPATH\"]='.'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2018-01-19T02:03:04.586375Z",
"start_time": "2018-01-19T02:03:04.178204Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab --no-import-all inline\n",
"%reload_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2018-01-19T02:03:04.627472Z",
"start_time": "2018-01-19T02:03:04.587944Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"['ddpg/train.py',\n",
" '--logdir',\n",
" './outputs/logs_ddpg2',\n",
" '--num-threads',\n",
" '3',\n",
" '--reward-scale',\n",
" '0.001',\n",
" '--actor-layers',\n",
" '64-64-64',\n",
" '--actor-layer-norm',\n",
" '--actor-parameters-noise',\n",
" '--actor-lr',\n",
" '0.001',\n",
" '--actor-lr-end',\n",
" '0.000001',\n",
" '--dynamics-lr',\n",
" '0.0001',\n",
" '--dynamics-lr-end',\n",
" '0.0000001',\n",
" '--critic-layers',\n",
" '64-32',\n",
" '--critic-layer-norm',\n",
" '--critic-lr',\n",
" '0.002',\n",
" '--critic-lr-end',\n",
" '0.000001',\n",
" '--initial-epsilon',\n",
" '0.5',\n",
" '--final-epsilon',\n",
" '0.001',\n",
" '--tau',\n",
" '0.0001']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"os.sys.argv=\"ddpg/train.py --logdir ./outputs/logs_ddpg2 \\\n",
"--num-threads 3 --num-train-threads 3 \\\n",
"--reward-scale 0.001 \\\n",
"--actor-layers 64-64-64 --actor-layer-norm --actor-parameters-noise --actor-lr 0.001 --actor-lr-end 0.000001 \\\n",
"--dynamics-lr 0.0001 --dynamics-lr-end 0.0000001 \\\n",
"--critic-layers 64-32 --critic-layer-norm --critic-lr 0.002 --critic-lr-end 0.000001 \\\n",
"--initial-epsilon 0.5 --final-epsilon 0.001 --tau 0.0001\".split(\" \")\n",
"os.sys.argv"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2018-01-19T02:03:03.852Z"
},
"collapsed": true
},
"outputs": [],
"source": [
"from ddpg.train import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2018-01-19T02:03:03.855Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2018-01-19 10:03:05,421] Making new env: Pendulum-v0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Base (\n",
" (feature_net): LinearNet (\n",
" (net): Sequential (\n",
" (linear_0): NoisyLinear (3 -> 64)\n",
" (layer_norm_0): LayerNorm (\n",
" )\n",
" (act_0): ReLU ()\n",
" (linear_1): NoisyLinear (64 -> 64)\n",
" (layer_norm_1): LayerNorm (\n",
" )\n",
" (act_1): ReLU ()\n",
" (linear_2): NoisyLinear (64 -> 64)\n",
" (layer_norm_2): LayerNorm (\n",
" )\n",
" (act_2): ReLU ()\n",
" )\n",
" )\n",
")\n",
"ActorHead (\n",
" (base): Base (\n",
" (feature_net): LinearNet (\n",
" (net): Sequential (\n",
" (linear_0): NoisyLinear (3 -> 64)\n",
" (layer_norm_0): LayerNorm (\n",
" )\n",
" (act_0): ReLU ()\n",
" (linear_1): NoisyLinear (64 -> 64)\n",
" (layer_norm_1): LayerNorm (\n",
" )\n",
" (act_1): ReLU ()\n",
" (linear_2): NoisyLinear (64 -> 64)\n",
" (layer_norm_2): LayerNorm (\n",
" )\n",
" (act_2): ReLU ()\n",
" )\n",
" )\n",
" )\n",
" (policy_net): LinearNet (\n",
" (net): Sequential (\n",
" (linear_0): Linear (64 -> 1)\n",
" (act_0): ELU (alpha=1.0)\n",
" )\n",
" )\n",
")\n",
"CriticHead (\n",
" (base): Base (\n",
" (feature_net): LinearNet (\n",
" (net): Sequential (\n",
" (linear_0): NoisyLinear (3 -> 64)\n",
" (layer_norm_0): LayerNorm (\n",
" )\n",
" (act_0): ReLU ()\n",
" (linear_1): NoisyLinear (64 -> 64)\n",
" (layer_norm_1): LayerNorm (\n",
" )\n",
" (act_1): ReLU ()\n",
" (linear_2): NoisyLinear (64 -> 64)\n",
" (layer_norm_2): LayerNorm (\n",
" )\n",
" (act_2): ReLU ()\n",
" )\n",
" )\n",
" )\n",
" (value_net): Linear (64 -> 1)\n",
")\n",
"DynamicsHead (\n",
" (base): Base (\n",
" (feature_net): LinearNet (\n",
" (net): Sequential (\n",
" (linear_0): NoisyLinear (3 -> 64)\n",
" (layer_norm_0): LayerNorm (\n",
" )\n",
" (act_0): ReLU ()\n",
" (linear_1): NoisyLinear (64 -> 64)\n",
" (layer_norm_1): LayerNorm (\n",
" )\n",
" (act_1): ReLU ()\n",
" (linear_2): NoisyLinear (64 -> 64)\n",
" (layer_norm_2): LayerNorm (\n",
" )\n",
" (act_2): ReLU ()\n",
" )\n",
" )\n",
" )\n",
" (value_net): LinearNet (\n",
" (net): Sequential (\n",
" (linear_0): Linear (65 -> 64)\n",
" (layer_norm_0): LayerNorm (\n",
" )\n",
" (act_0): ReLU ()\n",
" )\n",
" )\n",
" (value_net2): Linear (64 -> 3)\n",
")\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2018-01-19 10:03:05,570] Making new env: Pendulum-v0\n",
"[2018-01-19 10:03:05,589] Making new env: Pendulum-v0\n",
"Process Process-1:\n",
"Traceback (most recent call last):\n",
" File \"/home/isisilon/.pyenv/versions/3.6.0/lib/python3.6/multiprocessing/process.py\", line 249, in _bootstrap\n",
" self.run()\n",
" File \"/home/isisilon/.pyenv/versions/3.6.0/lib/python3.6/multiprocessing/process.py\", line 93, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/media/isisilon/Data/My_Documents/Documents/eclipse-workspace/ddpg_implicit_state/Run-Skeleton-Run/ddpg/model.py\", line 440, in train_single_thread\n",
" logger.scalar_summary(key, value, update_step)\n",
" File \"/media/isisilon/Data/My_Documents/Documents/eclipse-workspace/ddpg_implicit_state/Run-Skeleton-Run/common/logger.py\", line 17, in scalar_summary\n",
" self.writer.add_scalar(tag, value, step)\n",
" File \"/home/isisilon/.pyenv/versions/3.6.0/envs/jupyter3/lib/python3.6/site-packages/tensorboardX/writer.py\", line 262, in add_scalar\n",
" self.file_writer.add_summary(scalar(tag, scalar_value), global_step)\n",
" File \"/home/isisilon/.pyenv/versions/3.6.0/envs/jupyter3/lib/python3.6/site-packages/tensorboardX/summary.py\", line 88, in scalar\n",
" assert(scalar.squeeze().ndim==0), 'scalar should be 0D'\n",
"AttributeError: 'NoneType' object has no attribute 'squeeze'\n",
"[2018-01-19 10:04:01,980] Making new env: Pendulum-v0\n",
"[2018-01-19 10:04:05,897] Making new env: Pendulum-v0\n",
"[2018-01-19 10:04:56,915] Making new env: Pendulum-v0\n",
"[2018-01-19 10:04:59,842] Making new env: Pendulum-v0\n",
"[2018-01-19 10:05:51,136] Making new env: Pendulum-v0\n",
"[2018-01-19 10:05:54,795] Making new env: Pendulum-v0\n",
"[2018-01-19 10:06:58,471] Making new env: Pendulum-v0\n",
"[2018-01-19 10:07:01,303] Making new env: Pendulum-v0\n",
"[2018-01-19 10:08:06,776] Making new env: Pendulum-v0\n",
"[2018-01-19 10:08:11,823] Making new env: Pendulum-v0\n",
"[2018-01-19 10:09:24,555] Making new env: Pendulum-v0\n",
"[2018-01-19 10:09:28,326] Making new env: Pendulum-v0\n",
"[2018-01-19 10:10:25,677] Making new env: Pendulum-v0\n",
"[2018-01-19 10:10:28,134] Making new env: Pendulum-v0\n",
"[2018-01-19 10:11:21,746] Making new env: Pendulum-v0\n",
"[2018-01-19 10:11:22,767] Making new env: Pendulum-v0\n",
"[2018-01-19 10:12:19,668] Making new env: Pendulum-v0\n",
"[2018-01-19 10:12:20,498] Making new env: Pendulum-v0\n",
"[2018-01-19 10:13:16,087] Making new env: Pendulum-v0\n",
"[2018-01-19 10:13:19,701] Making new env: Pendulum-v0\n",
"[2018-01-19 10:14:20,287] Making new env: Pendulum-v0\n",
"[2018-01-19 10:14:25,255] Making new env: Pendulum-v0\n",
"[2018-01-19 10:15:25,534] Making new env: Pendulum-v0\n",
"[2018-01-19 10:15:30,307] Making new env: Pendulum-v0\n",
"[2018-01-19 10:16:24,529] Making new env: Pendulum-v0\n",
"[2018-01-19 10:16:27,554] Making new env: Pendulum-v0\n",
"[2018-01-19 10:17:23,602] Making new env: Pendulum-v0\n",
"[2018-01-19 10:17:24,952] Making new env: Pendulum-v0\n",
"[2018-01-19 10:18:19,517] Making new env: Pendulum-v0\n",
"[2018-01-19 10:18:21,783] Making new env: Pendulum-v0\n",
"[2018-01-19 10:19:17,384] Making new env: Pendulum-v0\n",
"[2018-01-19 10:19:19,827] Making new env: Pendulum-v0\n",
"[2018-01-19 10:20:15,270] Making new env: Pendulum-v0\n",
"[2018-01-19 10:20:15,364] Making new env: Pendulum-v0\n",
"[2018-01-19 10:21:09,305] Making new env: Pendulum-v0\n",
"[2018-01-19 10:21:12,535] Making new env: Pendulum-v0\n",
"[2018-01-19 10:22:03,694] Making new env: Pendulum-v0\n",
"[2018-01-19 10:22:05,211] Making new env: Pendulum-v0\n",
"[2018-01-19 10:22:59,166] Making new env: Pendulum-v0\n",
"[2018-01-19 10:23:03,510] Making new env: Pendulum-v0\n",
"[2018-01-19 10:23:52,806] Making new env: Pendulum-v0\n",
"[2018-01-19 10:23:58,895] Making new env: Pendulum-v0\n",
"[2018-01-19 10:24:44,687] Making new env: Pendulum-v0\n",
"[2018-01-19 10:24:52,964] Making new env: Pendulum-v0\n",
"[2018-01-19 10:25:37,681] Making new env: Pendulum-v0\n",
"[2018-01-19 10:25:51,033] Making new env: Pendulum-v0\n",
"[2018-01-19 10:26:34,755] Making new env: Pendulum-v0\n",
"[2018-01-19 10:26:45,860] Making new env: Pendulum-v0\n",
"[2018-01-19 10:27:27,254] Making new env: Pendulum-v0\n",
"[2018-01-19 10:27:44,223] Making new env: Pendulum-v0\n",
"[2018-01-19 10:28:25,083] Making new env: Pendulum-v0\n",
"[2018-01-19 10:28:40,452] Making new env: Pendulum-v0\n",
"[2018-01-19 10:29:21,854] Making new env: Pendulum-v0\n",
"[2018-01-19 10:29:40,183] Making new env: Pendulum-v0\n",
"[2018-01-19 10:30:20,224] Making new env: Pendulum-v0\n",
"[2018-01-19 10:30:43,777] Making new env: Pendulum-v0\n",
"[2018-01-19 10:31:21,376] Making new env: Pendulum-v0\n",
"[2018-01-19 10:31:43,822] Making new env: Pendulum-v0\n",
"[2018-01-19 10:32:19,617] Making new env: Pendulum-v0\n",
"[2018-01-19 10:32:42,093] Making new env: Pendulum-v0\n",
"[2018-01-19 10:33:13,267] Making new env: Pendulum-v0\n",
"[2018-01-19 10:33:38,685] Making new env: Pendulum-v0\n",
"[2018-01-19 10:34:08,173] Making new env: Pendulum-v0\n",
"[2018-01-19 10:34:36,100] Making new env: Pendulum-v0\n",
"[2018-01-19 10:35:03,554] Making new env: Pendulum-v0\n",
"[2018-01-19 10:35:31,869] Making new env: Pendulum-v0\n",
"[2018-01-19 10:36:02,086] Making new env: Pendulum-v0\n",
"[2018-01-19 10:36:30,077] Making new env: Pendulum-v0\n",
"[2018-01-19 10:37:04,189] Making new env: Pendulum-v0\n",
"[2018-01-19 10:37:33,748] Making new env: Pendulum-v0\n",
"[2018-01-19 10:38:03,424] Making new env: Pendulum-v0\n",
"[2018-01-19 10:38:35,036] Making new env: Pendulum-v0\n"
]
}
],
"source": [
"os.environ['OMP_NUM_THREADS'] = '3'\n",
"torch.set_num_threads(1)\n",
"args = parse_args()\n",
"train(args,\n",
" create_model,\n",
" create_act_update_fns,\n",
" train_multi_thread,\n",
" train_single_thread,\n",
" play_single_thread)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2018-01-19T02:01:33.149379Z",
"start_time": "2018-01-19T02:01:30.923966Z"
}
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"start_time": "2018-01-18T08:39:00.929Z"
},
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "jupyter3",
"language": "python",
"name": "jupyter3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"navigate_num": "#000000",
"navigate_text": "#333333",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700",
"sidebar_border": "#EEEEEE",
"wrapper_background": "#FFFFFF"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "12px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false,
"widenNotebook": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}