mirror of
https://github.com/wassname/pytorch-soft-actor-critic.git
synced 2026-06-27 16:46:28 +08:00
Edit README.md & main.py
This commit is contained in:
@@ -36,31 +36,46 @@ python main.py --env-name Humanoid-v2 --alpha 0.05 --tau 1 --target_update_inter
|
||||
python main.py --env-name Humanoid-v2 --policy Deterministic --tau 1 --target_update_interval 1000
|
||||
```
|
||||
|
||||
### Default Parameters
|
||||
### Default Arguments
|
||||
-------------
|
||||
|
||||
```
|
||||
usage: main.py [-h] [--env-name ENV_NAME] [--policy POLICY] [--eval EVAL]
|
||||
[--gamma G] [--tau G] [--lr G] [--alpha G]
|
||||
[--automatic_entropy_tuning G] [--seed N] [--batch_size N]
|
||||
[--num_steps N] [--hidden_size N] [--updates_per_step N]
|
||||
[--start_steps N] [--target_update_interval N]
|
||||
[--replay_size N] [--cuda]
|
||||
|
||||
| Parameters | Value |
|
||||
| --------------- | ------------- |
|
||||
|**Shared**|-|
|
||||
| optimizer | Adam |
|
||||
| learning rate(`--lr`) | 3x10<sup>−4</sup> |
|
||||
| discount(`--gamma`) (γ) | 0.99 |
|
||||
| replay buffer size(`--replay_size`) | 1x10<sup>6</sup> |
|
||||
| automatic_entropy_tuning(`--automatic_entropy_tuning`)|False|
|
||||
|number of hidden layers (all networks)|2|
|
||||
|number of hidden units per layer(`--hidden_size`)|256|
|
||||
|number of samples per minibatch(`--batch_size`)|256|
|
||||
|nonlinearity|ReLU|
|
||||
|**SAC**|-|
|
||||
|target smoothing coefficient(`--tau`) (τ)|0.005|
|
||||
|target update interval(`--target_update_interval`)|1|
|
||||
|gradient steps(`--updates_per_step`)|1|
|
||||
|**SAC** *(Hard Update)*|-|
|
||||
|target smoothing coefficient(`--tau`) (τ)|1|
|
||||
|target update interval(`--target_update_interval`)|1000|
|
||||
|gradient steps (except humanoids)(`--updates_per_step`)|4|
|
||||
|gradient steps (humanoids)(`--updates_per_step`)|1|
|
||||
PyTorch Soft Actor-Critic Args
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--env-name ENV_NAME Mujoco Gym environment (default: HalfCheetah-v2)
|
||||
--policy POLICY Policy Type: Gaussian | Deterministic (default:
|
||||
Gaussian)
|
||||
--eval EVAL Evaluates a policy a policy every 10 episode (default:
|
||||
True)
|
||||
--gamma G discount factor for reward (default: 0.99)
|
||||
--tau G target smoothing coefficient(τ) (default: 0.005)
|
||||
--lr G learning rate (default: 3x10<sup>-4</sup>)
|
||||
--alpha G Temperature parameter α determines the relative
|
||||
importance of the entropy term against the reward
|
||||
(default: 0.2)
|
||||
--automatic_entropy_tuning G
|
||||
Automaically adjust α (default: False)
|
||||
--seed N random seed (default: 123456)
|
||||
--batch_size N batch size (default: 256)
|
||||
--num_steps N maximum number of steps (default: 10<sup>6</sup>)
|
||||
--hidden_size N hidden size (default: 256)
|
||||
--updates_per_step N model updates per simulator step (default: 1)
|
||||
--start_steps N Steps sampling random actions (default: 10<sup>4</sup>)
|
||||
--target_update_interval N
|
||||
Value target update per no. of updates per step
|
||||
(default: 1)
|
||||
--replay_size N size of replay buffer (default: 10<sup>6</sup>)
|
||||
--cuda run on CUDA (default: False)
|
||||
```
|
||||
|
||||
------------
|
||||
|
||||
|
||||
@@ -8,13 +8,13 @@ from sac import SAC
|
||||
from tensorboardX import SummaryWriter
|
||||
from replay_memory import ReplayMemory
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
|
||||
parser = argparse.ArgumentParser(description='PyTorch Soft Actor-Critic Args')
|
||||
parser.add_argument('--env-name', default="HalfCheetah-v2",
|
||||
help='name of the environment to run')
|
||||
help='Mujoco Gym environment (default: HalfCheetah-v2)')
|
||||
parser.add_argument('--policy', default="Gaussian",
|
||||
help='algorithm to use: Gaussian | Deterministic')
|
||||
help='Policy Type: Gaussian | Deterministic (default: Gaussian)')
|
||||
parser.add_argument('--eval', type=bool, default=True,
|
||||
help='Evaluates a policy a policy every 10 episode (default:True)')
|
||||
help='Evaluates a policy a policy every 10 episode (default: True)')
|
||||
parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
|
||||
help='discount factor for reward (default: 0.99)')
|
||||
parser.add_argument('--tau', type=float, default=0.005, metavar='G',
|
||||
@@ -22,11 +22,12 @@ parser.add_argument('--tau', type=float, default=0.005, metavar='G',
|
||||
parser.add_argument('--lr', type=float, default=0.0003, metavar='G',
|
||||
help='learning rate (default: 0.0003)')
|
||||
parser.add_argument('--alpha', type=float, default=0.2, metavar='G',
|
||||
help='Temperature parameter α determines the relative importance of the entropy term against the reward (default: 0.2)')
|
||||
help='Temperature parameter α determines the relative importance of the entropy\
|
||||
term against the reward (default: 0.2)')
|
||||
parser.add_argument('--automatic_entropy_tuning', type=bool, default=False, metavar='G',
|
||||
help='Temperature parameter α automaically adjusted.')
|
||||
parser.add_argument('--seed', type=int, default=456, metavar='N',
|
||||
help='random seed (default: 456)')
|
||||
help='Automaically adjust α (default: False)')
|
||||
parser.add_argument('--seed', type=int, default=123456, metavar='N',
|
||||
help='random seed (default: 123456)')
|
||||
parser.add_argument('--batch_size', type=int, default=256, metavar='N',
|
||||
help='batch size (default: 256)')
|
||||
parser.add_argument('--num_steps', type=int, default=1000001, metavar='N',
|
||||
|
||||
Reference in New Issue
Block a user