diff --git a/Makefile b/Makefile index 8557c41..6c241be 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,14 @@ LOGURU_LEVEL=INFO run: ulimit -S -m 65000000 ulimit -S -v 65000000 - LOGURU_LEVEL=INFO ${python} main.py --cuda --automatic_entropy_tuning true --replay_size 50000 --load auto + LOGURU_LEVEL=INFO ${python} \ + -m pdb -c continue \ + main.py \ + --cuda \ + --automatic_entropy_tuning true \ + --replay_size 10000 \ + --demonstrations data/demonstrations \ + # --load auto \ # ${python} -m pdb main.py --cuda --automatic_entropy_tuning true --replay_size 10000 --load auto --start_steps 200 # LOGURU_LEVEL=INFO ${python} main.py --demonstrations data/demonstrations --cuda --automatic_entropy_tuning true --replay_size 20000 --load auto # LOGURU_LEVEL=INFO ${python} main.py --demonstrations data/demonstrations --cuda --updates_per_step 2 --load auto --alpha 0.1 --tau 1 --target_update_interval 1000 diff --git a/main.py b/main.py index 5bd55f4..db185c1 100644 --- a/main.py +++ b/main.py @@ -12,7 +12,6 @@ from load_demonstrations import load_demonstrations import apple_gym.env import pickle from process_obs import ProcessObservation -# from torchinfo import summary from torch.utils.tensorboard import SummaryWriter from progress import RichTQDM @@ -98,8 +97,14 @@ logger.info(f"process_obs reduces obs_space {env.observation_space.shape[0]}-{pr # Agent agent = SAC(observation_dim, env.action_space, args, process_obs) -# TODO -# summary(model, input_size=(batch_size, 1, 28, 28)) +# from torchinfo import summary +# print('process_obs') +# summary(process_obs, input_size=(2, *env.observation_space.shape), depth=2) +# print('critic') +# summary(agent.critic, input_size=((2, observation_dim), (2, action_dim))) +# print('policy') +# summary(agent.policy, input_size=(2, observation_dim)) +# # print(process_obs, agent.critic, agent.policy) #Tensorboard log_name = '{}_SAC_{}_{}_{}'.format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), args.env_name, @@ -117,13 +122,21 @@ def save(save_dir): try: save_dir.mkdir(exist_ok=True) logger.info(f'Saving to {save_dir}') - agent.save_model(save_dir/'actor.pkl', save_dir/'critic.pkl') + agent.save_model( + save_dir / 'actor.pkl', + save_dir / 'critic.pkl', + save_dir / 'process_obs.pkl' + ) # memory.save(save_dir / 'memory.pkl') # crashes at over 200k except Exception as e: logging.exception("failed to save") def load(save_dir): - agent.load_model(save_dir / 'actor.pkl', save_dir / 'critic.pkl') + agent.load_model( + save_dir / 'actor.pkl', + save_dir / 'critic.pkl', + save_dir / 'process_obs.pkl' + ) # if args.train: # memory.load(save_dir/'memory.pkl') @@ -145,10 +158,9 @@ updates = 0 with RichTQDM() as prog: task1 = prog.add_task("[red]steps", total=args.num_steps) - task2 = prog.add_task("[red]updates", total=args.num_steps) - task3 = prog.add_task("[red]test", total=args.num_steps) + task2 = prog.add_task("[blue]updates", total=args.num_steps) + task3 = prog.add_task("[green]test", total=args.num_steps) for i_episode in itertools.count(0): - print('1') episode_reward = 0 episode_steps = 0 done = False @@ -160,7 +172,7 @@ with RichTQDM() as prog: else: action = agent.select_action(state) # Sample action from policy - if len(memory) > args.batch_size: + if len(memory) > args.batch_size and (total_numsteps%20==0): # Number of updates per step in environment for i in range(args.updates_per_step): # Update parameters of all the networks diff --git a/process_obs.py b/process_obs.py index 5aae4fe..6ef5825 100644 --- a/process_obs.py +++ b/process_obs.py @@ -122,7 +122,7 @@ class ProcessObservation(nn.Module): os.path.dirname(os.path.abspath(__file__)), 'data/nets/cornell-randsplit-rgbd-grconvnet3-drop1-ch16/epoch_30_iou_0.97.pt' ) - self.feature_extractor = GenerativeResnet3Headless().half() + self.feature_extractor = GenerativeResnet3Headless().train().half() self.feature_extractor.load_state_dict(state_dict=torch.load(grconvnet3_path), strict=False) old_img_size = (res[0], res[1], 8) @@ -146,11 +146,11 @@ class ProcessObservation(nn.Module): # make a batch x = torch.cat([base_rgbd, arm_rgbd], 0) x = x.permute((0, 3, 1, 2)) # to ((-1, 4, x, y)) - x = x.half() h = self.feature_extractor(x) # undo fake batch base_h, arm_h = h[:bs].reshape((bs, -1)), h[bs:].reshape((bs, -1)) # add features together y = torch.cat([others, base_h, arm_h], 1) + assert torch.isfinite(y).all() return y diff --git a/replay_memory.py b/replay_memory.py index 61f9b81..d69f260 100644 --- a/replay_memory.py +++ b/replay_memory.py @@ -4,7 +4,7 @@ import torch import hickle import os from loguru import logger - +# import bcolz import lz4.frame import cloudpickle as pickle @@ -56,40 +56,40 @@ class ReplayMemory: self.position = len(self.buffer) -class ReplayMemory2: - def __init__(self, capacity, seed, observation_dim, action_dim): - random.seed(seed) - self.capacity = capacity - self._observations = np.zeros((capacity, observation_dim), dtype='float16') - self._actions = np.zeros((capacity, action_dim)) - self._rewards = np.zeros((capacity, 1)) - self._next_obs = np.zeros((capacity, observation_dim), dtype='float16') - self._terminals = np.zeros((capacity, 1), dtype='uint8') - self.position = 0 - self._size = 0 +# class ReplayMemory: +# def __init__(self, capacity, seed, observation_dim, action_dim): +# random.seed(seed) +# self.capacity = capacity +# self._observations = (bcolz.zeros((capacity, observation_dim), dtype='float16')) +# self._actions = (bcolz.zeros((capacity, action_dim))) +# self._rewards = (bcolz.zeros((capacity, 1))) +# self._next_obs = (bcolz.zeros((capacity, observation_dim), dtype='float16')) +# self._terminals = (bcolz.zeros((capacity, 1), dtype='uint8')) +# self.position = 0 +# self._size = 0 - def push(self, state, action, reward, next_state, done): - self._observations[self.position] = state - self._actions[self.position] = action - self._rewards[self.position] = reward - self._next_obs[self.position] = next_state - self._terminals[self.position] = done - self.position = (self.position + 1) % self.capacity - if self._size