diff --git a/Makefile b/Makefile
index 76c1ecf..44d303d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,12 @@
 python=/home/wassname/anaconda/envs/diygym3/bin/python
 date=2021-01-03_13-30-07
 LOGURU_LEVEL=INFO
+# ulimit -S -m 35000000
+# ulimit -S -v 35000000
+
 run:
-	LOGURU_LEVEL=INFO ${python} main.py --demonstrations data/demonstrations --cuda --updates_per_step 4 --automatic_entropy_tuning true
+	LOGURU_LEVEL=INFO ${python} -m pdb main.py  --cuda --automatic_entropy_tuning true --replay_size 15000 --load auto
+	# LOGURU_LEVEL=INFO ${python} main.py  --demonstrations data/demonstrations --cuda --automatic_entropy_tuning true --replay_size 20000 --load auto
 	# LOGURU_LEVEL=INFO ${python} main.py --demonstrations data/demonstrations --cuda --updates_per_step 2 --load auto --alpha 0.1 --tau 1 --target_update_interval 1000
 	# LOGURU_LEVEL=INFO ${python} main.py --demonstrations data/demonstrations --cuda --updates_per_step 2 --load auto --tau 1 --target_update_interval 1000 --policy Deterministic
 	
diff --git a/README.md b/README.md
index c3d5587..03024d8 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,10 @@
 Modified for wassname's apple gym
 
+changes:
+- save
+- process_obs with grconvnet
+- logging
+
 make run
 make play
 
diff --git a/main.py b/main.py
index 440c003..d3dc7aa 100644
--- a/main.py
+++ b/main.py
@@ -7,7 +7,6 @@ from pathlib import Path
 import logging
 import torch
 from sac import SAC
-from torch.utils.tensorboard import SummaryWriter
 from replay_memory import ReplayMemory
 from load_demonstrations import load_demonstrations
 import apple_gym.env
@@ -15,11 +14,25 @@ import pickle
 from process_obs import ProcessObservation
 # from torchinfo import summary
 from tqdm.auto import tqdm
+from torch.utils.tensorboard import SummaryWriter
 
 
 from loguru import logger
 from rich import print
 from rich.logging import RichHandler
+from rich.progress import (
+    ProgressColumn,
+    BarColumn,
+    DownloadColumn,
+    TextColumn,
+    TransferSpeedColumn,
+    TimeRemainingColumn,
+    Progress,
+    TaskID,
+    TimeElapsedColumn,
+    SpinnerColumn,
+    Text
+)
 logging.basicConfig(level=logging.INFO, handlers=[RichHandler(rich_tracebacks=True, markup=True)])
 logger.configure(handlers=[{"sink": RichHandler(markup=True),
                          "format": "{message}"}])
@@ -89,13 +102,15 @@ torch.manual_seed(args.seed)
 np.random.seed(args.seed)
 
 # A visual network
-observation_space=env.observation_space.shape[0] 
+action_dim = env.action_space.shape[0]
+observation_dim=env.observation_space.shape[0] 
 process_obs=ProcessObservation()
-observation_space=observation_space - process_obs.reduce_action_space
-logger.info(f"process_obs reduces obs_space {env.observation_space.shape[0]}-{process_obs.reduce_action_space}={observation_space}")
+observation_dim=observation_dim - process_obs.reduce_obs_space
+
+logger.info(f"process_obs reduces obs_space {env.observation_space.shape[0]}-{process_obs.reduce_obs_space}={observation_dim}")
 
 # Agent
-agent = SAC(observation_space, env.action_space, args, process_obs)
+agent = SAC(observation_dim, env.action_space, args, process_obs)
 
 # TODO
 # summary(model, input_size=(batch_size, 1, 28, 28))
@@ -109,7 +124,7 @@ logger.info(f"log name {log_name}")
 save_dir=Path("models") / log_name
 
 # Memory
-memory=ReplayMemory(args.replay_size, args.seed)
+memory=ReplayMemory(args.replay_size, args.seed, env.observation_space.shape[0], action_dim)
 
 
 def save(save_dir):
@@ -141,7 +156,32 @@ if args.demonstrations:
 total_numsteps = 0
 updates = 0
 
-with tqdm(unit='steps', mininterval=5) as prog:
+class SpeedColumn(ProgressColumn):
+    """Renders human readable transfer speed."""
+
+    def render(self, task: "Task") -> Text:
+        """Show data transfer speed."""
+        speed = task.speed
+        if speed is None:
+            return Text("?", style="progress.data.speed")
+        return Text(f"{speed:2.2f} it/s", style="progress.data.speed")
+
+with Progress(
+    SpinnerColumn(),
+    "[progress.description]{task.description}",
+    BarColumn(),
+    TextColumn("{task.completed}/{task.total}"),
+    "[",
+    TimeElapsedColumn(),
+    "<",
+    TimeRemainingColumn(),
+    ',',
+    SpeedColumn(),
+    ']',
+    refresh_per_second=1, speed_estimate_period=360
+    ) as prog:
+    task1 = prog.add_task("[red]steps", total=args.num_steps)
+    task2 = prog.add_task("[red]updates", total=args.num_steps)
     for i_episode in itertools.count(0):
         print('1')
         episode_reward = 0
@@ -168,11 +208,12 @@ with tqdm(unit='steps', mininterval=5) as prog:
                     writer.add_scalar('entropy_temperature/alpha', alpha, updates)
 
                     updates += 1
+                    prog.update(task2, advance=1)
 
             next_state, reward, done, info = env.step(action)  # Step
             episode_steps += 1
             total_numsteps += 1
-            prog.update(1)
+            prog.update(task1, advance=1)
             episode_reward += reward
 
             # log env stuff
diff --git a/process_obs.py b/process_obs.py
index 60827e0..1f5ede3 100644
--- a/process_obs.py
+++ b/process_obs.py
@@ -46,26 +46,26 @@ class GenerativeResnet3Headless(nn.Module):
         self.res4 = ResidualBlock(channel_size * 4, channel_size * 4)
 
 
-        self.conv4 = nn.ConvTranspose2d(channel_size * 4, channel_size * 2, kernel_size=4, stride=2, padding=1,
-                                        output_padding=1)
-        self.bn4 = nn.BatchNorm2d(channel_size * 2)
+        # self.conv4 = nn.ConvTranspose2d(channel_size * 4, channel_size * 2, kernel_size=4, stride=2, padding=1,
+        #                                 output_padding=1)
+        # self.bn4 = nn.BatchNorm2d(channel_size * 2)
 
-        self.conv5 = nn.ConvTranspose2d(channel_size * 2, channel_size, kernel_size=4, stride=2, padding=2,
-                                        output_padding=1)
-        self.bn5 = nn.BatchNorm2d(channel_size)
+        # self.conv5 = nn.ConvTranspose2d(channel_size * 2, channel_size, kernel_size=4, stride=2, padding=2,
+        #                                 output_padding=1)
+        # self.bn5 = nn.BatchNorm2d(channel_size)
 
-        self.conv6 = nn.ConvTranspose2d(channel_size, channel_size, kernel_size=9, stride=1, padding=4)
+        # self.conv6 = nn.ConvTranspose2d(channel_size, channel_size, kernel_size=9, stride=1, padding=4)
 
-        self.pos_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
-        self.cos_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
-        self.sin_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
-        self.width_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
+        # self.pos_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
+        # self.cos_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
+        # self.sin_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
+        # self.width_output = nn.Conv2d(in_channels=channel_size, out_channels=output_channels, kernel_size=2)
 
-        self.dropout = dropout
-        self.dropout_pos = nn.Dropout(p=prob)
-        self.dropout_cos = nn.Dropout(p=prob)
-        self.dropout_sin = nn.Dropout(p=prob)
-        self.dropout_wid = nn.Dropout(p=prob)
+        # self.dropout = dropout
+        # self.dropout_pos = nn.Dropout(p=prob)
+        # self.dropout_cos = nn.Dropout(p=prob)
+        # self.dropout_sin = nn.Dropout(p=prob)
+        # self.dropout_wid = nn.Dropout(p=prob)
 
         # freeze above params
         for param in self.parameters():
@@ -122,12 +122,12 @@ class ProcessObservation(nn.Module):
             os.path.dirname(os.path.abspath(__file__)),
             'data/nets/cornell-randsplit-rgbd-grconvnet3-drop1-ch16/epoch_30_iou_0.97.pt'
         )
-        self.feature_extractor = GenerativeResnet3Headless().eval()
-        self.feature_extractor.load_state_dict(state_dict=torch.load(grconvnet3_path))
+        self.feature_extractor = GenerativeResnet3Headless()#.half()
+        self.feature_extractor.load_state_dict(state_dict=torch.load(grconvnet3_path), strict=False)
 
         old_img_size = (res[0], res[1], 8)
         new_img_size = (res[0]//16-1, res[1]//16-1, 8)
-        self.reduce_action_space = int(np.prod(old_img_size) - np.prod(new_img_size))
+        self.reduce_obs_space = int(np.prod(old_img_size) - np.prod(new_img_size))
     
     def __call__(self, obs):
         """
@@ -135,7 +135,7 @@ class ProcessObservation(nn.Module):
 
         This assumes the observations ends in 2 rgbd images with shape (224, 244, 4)
         """
-        # import pdb; pdb.set_trace()
+        assert obs.shape[-1] > self.res[0] * self.res[1] * 8
         h, w = self.res
         px = h * w
         base_rgbd = obs[:, -px * 4:].reshape((-1, h, w, 4))
diff --git a/replay_memory.py b/replay_memory.py
index 37d6580..2767e41 100644
--- a/replay_memory.py
+++ b/replay_memory.py
@@ -5,7 +5,23 @@ import hickle
 import os
 from loguru import logger
 
-class ReplayMemory:
+import lz4.frame
+import cloudpickle as pickle
+
+def pack(data):
+    data = pickle.dumps(data)
+    data = lz4.frame.compress(data)
+    # data = base64.b64encode(data).decode("ascii")
+    return data
+
+def unpack(data):
+    # data = base64.b64decode(data)
+    data = lz4.frame.decompress(data)
+    data = pickle.loads(data)
+    return data
+
+
+class ReplayMemory2:
     def __init__(self, capacity, seed):
         random.seed(seed)
         self.capacity = capacity
@@ -15,11 +31,14 @@ class ReplayMemory:
     def push(self, state, action, reward, next_state, done):
         if len(self.buffer) < self.capacity:
             self.buffer.append(None)
-        self.buffer[self.position] = (state, action, reward, next_state, done)
+        batch = (state, action, reward, next_state, done)
+        # batch = pack(batch) # slow it down 10x
+        self.buffer[self.position] = batch
         self.position = (self.position + 1) % self.capacity
 
     def sample(self, batch_size):
         batch = random.sample(self.buffer, batch_size)
+        # batch = [unpack(d) for d in batch]
         state, action, reward, next_state, done = map(np.stack, zip(*batch))
         return state, action, reward, next_state, done
 
@@ -35,3 +54,77 @@ class ReplayMemory:
         if memory_path is not None:
             self.buffer = hickle.load(memory_path)
             self.position = len(self.buffer)
+
+
+class ReplayMemory:
+    def __init__(self, capacity, seed, observation_dim, action_dim):
+        random.seed(seed)
+        self.capacity = capacity
+        self._observations = np.zeros((capacity, observation_dim), dtype='float16')
+        self._actions = np.zeros((capacity, action_dim))
+        self._rewards = np.zeros((capacity, 1))
+        self._next_obs = np.zeros((capacity, observation_dim), dtype='float16')
+        self._terminals = np.zeros((capacity, 1), dtype='uint8')
+        self.position = 0
+        self._size = 0
+
+    def push(self, state, action, reward, next_state, done):
+        self._observations[self.position] = state
+        self._actions[self.position] = action
+        self._rewards[self.position] = reward
+        self._next_obs[self.position] = next_state
+        self._terminals[self.position] = done
+        self.position = (self.position + 1) % self.capacity
+        if self._size<self.capacity:
+            self._size += 1
+
+    def sample(self, batch_size):
+        n = min(self.position, self.capacity)
+        indices = np.random.choice(n, size=batch_size)
+        state = self._observations[indices]
+        action = self._actions[indices]
+        reward = self._rewards[indices]
+        next_state = self._next_obs[indices]
+        done = self._terminals[indices]
+        return state, action, reward, next_state, done
+
+    def __len__(self):
+        return self._size
+
+
+# class BatchedReplayMemory:
+#     def __init__(self, capacity, seed, action_dim, observation_dim):
+#         random.seed(seed)
+#         self.capacity = capacity
+#         self._observations = np.zeros((capacity, observation_dim))
+#         self._actions = np.zeros((capacity, action_dim), dtype='float16')
+#         self._rewards = np.zeros((capacity, 1))
+#         self._next_obs = np.zeros((capacity, observation_dim), dtype='float16')
+#         self._terminals = np.zeros((capacity, 1), dtype='uint8')
+#         self.position = 0
+#         raise NotImplementedError()
+
+#     def push(self, state, action, reward, next_state, done):
+#         self._observations[self.position] = state
+#         self._actions[self.position] = action
+#         self._rewards[self.position] = reward
+#         self._next_obs[self.position] = next_state
+#         self._terminals[self.position] = done
+#         if self.position > self.capacity:
+#             # write to a dask capable file
+#         self.position = (self.position + 1) % self.capacity
+#         raise NotImplementedError()
+
+#     def sample(self, batch_size):
+#         # first choose a historic dask file, and this one
+#         # sample from both
+#         indices = np.random.choice(self._size, size=batch_size)
+#         state = self._observations[indices]
+#         action = self._actions[indices]
+#         reward = self._rewards[indices]
+#         next_state = self._next_obs[indices]
+#         done = self._terminals[indices]
+#         return state, action, reward, next_state, done
+
+#     def __len__(self):
+#         return len(self._observations)
diff --git a/sac.py b/sac.py
index 6d91743..3111da6 100644
--- a/sac.py
+++ b/sac.py
@@ -44,16 +44,20 @@ class SAC(object):
             self.alpha = 0
             self.automatic_entropy_tuning = False
             self.policy = DeterministicPolicy(num_inputs, action_space.shape[0], args.hidden_size, action_space).to(self.device)
-            self.policy_optim = Adam(self.policy.parameters(), lr=args.lr)
+            self.policy_optim = Adam(
+                list(self.policy.parameters()) + list(process_obs.parameters()),
+                lr=args.lr)
 
     def select_action(self, obs, evaluate=False):
-        obs = torch.FloatTensor(obs).to(self.device).unsqueeze(0)
-        state = self.process_obs(obs)
-        if evaluate is False:
-            action, _, _ = self.policy.sample(state)
-        else:
-            _, _, action = self.policy.sample(state)
-        return action.detach().cpu().numpy()[0]
+        with torch.no_grad():
+            obs = torch.FloatTensor(obs).to(self.device).unsqueeze(0)
+            state = self.process_obs(obs)
+            if evaluate is False:
+                action, _, _ = self.policy.sample(state)
+            else:
+                _, _, action = self.policy.sample(state)
+            action = action.detach().cpu().numpy()[0]
+        return action
 
     def update_parameters(self, memory, batch_size, updates):
         # Sample a batch from memory