diff --git a/README.md b/README.md
index fd97271..8018513 100644
--- a/README.md
+++ b/README.md
@@ -36,31 +36,46 @@ python main.py --env-name Humanoid-v2 --alpha 0.05 --tau 1 --target_update_inter
python main.py --env-name Humanoid-v2 --policy Deterministic --tau 1 --target_update_interval 1000
```
-### Default Parameters
+### Default Arguments
-------------
+```
+usage: main.py [-h] [--env-name ENV_NAME] [--policy POLICY] [--eval EVAL]
+ [--gamma G] [--tau G] [--lr G] [--alpha G]
+ [--automatic_entropy_tuning G] [--seed N] [--batch_size N]
+ [--num_steps N] [--hidden_size N] [--updates_per_step N]
+ [--start_steps N] [--target_update_interval N]
+ [--replay_size N] [--cuda]
-| Parameters | Value |
-| --------------- | ------------- |
-|**Shared**|-|
-| optimizer | Adam |
-| learning rate(`--lr`) | 3x10−4 |
-| discount(`--gamma`) (γ) | 0.99 |
-| replay buffer size(`--replay_size`) | 1x106 |
-| automatic_entropy_tuning(`--automatic_entropy_tuning`)|False|
-|number of hidden layers (all networks)|2|
-|number of hidden units per layer(`--hidden_size`)|256|
-|number of samples per minibatch(`--batch_size`)|256|
-|nonlinearity|ReLU|
-|**SAC**|-|
-|target smoothing coefficient(`--tau`) (τ)|0.005|
-|target update interval(`--target_update_interval`)|1|
-|gradient steps(`--updates_per_step`)|1|
-|**SAC** *(Hard Update)*|-|
-|target smoothing coefficient(`--tau`) (τ)|1|
-|target update interval(`--target_update_interval`)|1000|
-|gradient steps (except humanoids)(`--updates_per_step`)|4|
-|gradient steps (humanoids)(`--updates_per_step`)|1|
+PyTorch Soft Actor-Critic Args
+
+optional arguments:
+ -h, --help show this help message and exit
+ --env-name ENV_NAME Mujoco Gym environment (default: HalfCheetah-v2)
+ --policy POLICY Policy Type: Gaussian | Deterministic (default:
+ Gaussian)
+ --eval EVAL Evaluates a policy a policy every 10 episode (default:
+ True)
+ --gamma G discount factor for reward (default: 0.99)
+ --tau G target smoothing coefficient(τ) (default: 0.005)
+ --lr G learning rate (default: 3x10-4)
+ --alpha G Temperature parameter α determines the relative
+ importance of the entropy term against the reward
+ (default: 0.2)
+ --automatic_entropy_tuning G
+ Automaically adjust α (default: False)
+ --seed N random seed (default: 123456)
+ --batch_size N batch size (default: 256)
+ --num_steps N maximum number of steps (default: 106)
+ --hidden_size N hidden size (default: 256)
+ --updates_per_step N model updates per simulator step (default: 1)
+ --start_steps N Steps sampling random actions (default: 104)
+ --target_update_interval N
+ Value target update per no. of updates per step
+ (default: 1)
+ --replay_size N size of replay buffer (default: 106)
+ --cuda run on CUDA (default: False)
+```
------------
diff --git a/main.py b/main.py
index 344e5aa..7fca690 100644
--- a/main.py
+++ b/main.py
@@ -8,13 +8,13 @@ from sac import SAC
from tensorboardX import SummaryWriter
from replay_memory import ReplayMemory
-parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
+parser = argparse.ArgumentParser(description='PyTorch Soft Actor-Critic Args')
parser.add_argument('--env-name', default="HalfCheetah-v2",
- help='name of the environment to run')
+ help='Mujoco Gym environment (default: HalfCheetah-v2)')
parser.add_argument('--policy', default="Gaussian",
- help='algorithm to use: Gaussian | Deterministic')
+ help='Policy Type: Gaussian | Deterministic (default: Gaussian)')
parser.add_argument('--eval', type=bool, default=True,
- help='Evaluates a policy a policy every 10 episode (default:True)')
+ help='Evaluates a policy a policy every 10 episode (default: True)')
parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
help='discount factor for reward (default: 0.99)')
parser.add_argument('--tau', type=float, default=0.005, metavar='G',
@@ -22,11 +22,12 @@ parser.add_argument('--tau', type=float, default=0.005, metavar='G',
parser.add_argument('--lr', type=float, default=0.0003, metavar='G',
help='learning rate (default: 0.0003)')
parser.add_argument('--alpha', type=float, default=0.2, metavar='G',
- help='Temperature parameter α determines the relative importance of the entropy term against the reward (default: 0.2)')
+ help='Temperature parameter α determines the relative importance of the entropy\
+ term against the reward (default: 0.2)')
parser.add_argument('--automatic_entropy_tuning', type=bool, default=False, metavar='G',
- help='Temperature parameter α automaically adjusted.')
-parser.add_argument('--seed', type=int, default=456, metavar='N',
- help='random seed (default: 456)')
+ help='Automaically adjust α (default: False)')
+parser.add_argument('--seed', type=int, default=123456, metavar='N',
+ help='random seed (default: 123456)')
parser.add_argument('--batch_size', type=int, default=256, metavar='N',
help='batch size (default: 256)')
parser.add_argument('--num_steps', type=int, default=1000001, metavar='N',