diff --git a/README.md b/README.md index fd97271..8018513 100644 --- a/README.md +++ b/README.md @@ -36,31 +36,46 @@ python main.py --env-name Humanoid-v2 --alpha 0.05 --tau 1 --target_update_inter python main.py --env-name Humanoid-v2 --policy Deterministic --tau 1 --target_update_interval 1000 ``` -### Default Parameters +### Default Arguments ------------- +``` +usage: main.py [-h] [--env-name ENV_NAME] [--policy POLICY] [--eval EVAL] + [--gamma G] [--tau G] [--lr G] [--alpha G] + [--automatic_entropy_tuning G] [--seed N] [--batch_size N] + [--num_steps N] [--hidden_size N] [--updates_per_step N] + [--start_steps N] [--target_update_interval N] + [--replay_size N] [--cuda] -| Parameters | Value | -| --------------- | ------------- | -|**Shared**|-| -| optimizer | Adam | -| learning rate(`--lr`) | 3x10−4 | -| discount(`--gamma`) (γ) | 0.99 | -| replay buffer size(`--replay_size`) | 1x106 | -| automatic_entropy_tuning(`--automatic_entropy_tuning`)|False| -|number of hidden layers (all networks)|2| -|number of hidden units per layer(`--hidden_size`)|256| -|number of samples per minibatch(`--batch_size`)|256| -|nonlinearity|ReLU| -|**SAC**|-| -|target smoothing coefficient(`--tau`) (τ)|0.005| -|target update interval(`--target_update_interval`)|1| -|gradient steps(`--updates_per_step`)|1| -|**SAC** *(Hard Update)*|-| -|target smoothing coefficient(`--tau`) (τ)|1| -|target update interval(`--target_update_interval`)|1000| -|gradient steps (except humanoids)(`--updates_per_step`)|4| -|gradient steps (humanoids)(`--updates_per_step`)|1| +PyTorch Soft Actor-Critic Args + +optional arguments: + -h, --help show this help message and exit + --env-name ENV_NAME Mujoco Gym environment (default: HalfCheetah-v2) + --policy POLICY Policy Type: Gaussian | Deterministic (default: + Gaussian) + --eval EVAL Evaluates a policy a policy every 10 episode (default: + True) + --gamma G discount factor for reward (default: 0.99) + --tau G target smoothing coefficient(τ) (default: 0.005) + --lr G learning rate (default: 3x10-4) + --alpha G Temperature parameter α determines the relative + importance of the entropy term against the reward + (default: 0.2) + --automatic_entropy_tuning G + Automaically adjust α (default: False) + --seed N random seed (default: 123456) + --batch_size N batch size (default: 256) + --num_steps N maximum number of steps (default: 106) + --hidden_size N hidden size (default: 256) + --updates_per_step N model updates per simulator step (default: 1) + --start_steps N Steps sampling random actions (default: 104) + --target_update_interval N + Value target update per no. of updates per step + (default: 1) + --replay_size N size of replay buffer (default: 106) + --cuda run on CUDA (default: False) +``` ------------ diff --git a/main.py b/main.py index 344e5aa..7fca690 100644 --- a/main.py +++ b/main.py @@ -8,13 +8,13 @@ from sac import SAC from tensorboardX import SummaryWriter from replay_memory import ReplayMemory -parser = argparse.ArgumentParser(description='PyTorch REINFORCE example') +parser = argparse.ArgumentParser(description='PyTorch Soft Actor-Critic Args') parser.add_argument('--env-name', default="HalfCheetah-v2", - help='name of the environment to run') + help='Mujoco Gym environment (default: HalfCheetah-v2)') parser.add_argument('--policy', default="Gaussian", - help='algorithm to use: Gaussian | Deterministic') + help='Policy Type: Gaussian | Deterministic (default: Gaussian)') parser.add_argument('--eval', type=bool, default=True, - help='Evaluates a policy a policy every 10 episode (default:True)') + help='Evaluates a policy a policy every 10 episode (default: True)') parser.add_argument('--gamma', type=float, default=0.99, metavar='G', help='discount factor for reward (default: 0.99)') parser.add_argument('--tau', type=float, default=0.005, metavar='G', @@ -22,11 +22,12 @@ parser.add_argument('--tau', type=float, default=0.005, metavar='G', parser.add_argument('--lr', type=float, default=0.0003, metavar='G', help='learning rate (default: 0.0003)') parser.add_argument('--alpha', type=float, default=0.2, metavar='G', - help='Temperature parameter α determines the relative importance of the entropy term against the reward (default: 0.2)') + help='Temperature parameter α determines the relative importance of the entropy\ + term against the reward (default: 0.2)') parser.add_argument('--automatic_entropy_tuning', type=bool, default=False, metavar='G', - help='Temperature parameter α automaically adjusted.') -parser.add_argument('--seed', type=int, default=456, metavar='N', - help='random seed (default: 456)') + help='Automaically adjust α (default: False)') +parser.add_argument('--seed', type=int, default=123456, metavar='N', + help='random seed (default: 123456)') parser.add_argument('--batch_size', type=int, default=256, metavar='N', help='batch size (default: 256)') parser.add_argument('--num_steps', type=int, default=1000001, metavar='N',