Skip to content

Commit

Permalink
Merge pull request #92 from UoA-CARES/dvr_LR_move
Browse files Browse the repository at this point in the history
LR move out
  • Loading branch information
beardyFace authored Oct 17, 2023
2 parents 0e56e68 + 5037c94 commit 2e56193
Show file tree
Hide file tree
Showing 19 changed files with 93 additions and 99 deletions.
13 changes: 9 additions & 4 deletions cares_reinforcement_learning/algorithm/policy/DDPG.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def __init__(self,
gamma,
tau,
action_num,
actor_lr,
critic_lr,
device
):

Expand All @@ -27,6 +29,9 @@ def __init__(self,
self.gamma = gamma
self.tau = tau

self.actor_net_optimiser = torch.optim.Adam(self.actor_net.parameters(), lr=actor_lr)
self.critic_net_optimiser = torch.optim.Adam(self.critic_net.parameters(), lr=critic_lr)

self.device = device

def select_action_from_policy(self, state, evaluation=None):
Expand Down Expand Up @@ -65,17 +70,17 @@ def train_policy(self, experiences):

# Update the Critic Network
critic_loss = F.mse_loss(q_values, q_target)
self.critic_net.optimiser.zero_grad()
self.critic_net_optimiser.zero_grad()
critic_loss.backward()
self.critic_net.optimiser.step()
self.critic_net_optimiser.step()

# Update the Actor Network
actor_q = self.critic_net(states, self.actor_net(states))
actor_loss = -actor_q.mean()

self.actor_net.optimiser.zero_grad()
self.actor_net_optimiser.zero_grad()
actor_loss.backward()
self.actor_net.optimiser.step()
self.actor_net_optimiser.step()

for target_param, param in zip(self.target_critic_net.parameters(), self.critic_net.parameters()):
target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau))
Expand Down
13 changes: 9 additions & 4 deletions cares_reinforcement_learning/algorithm/policy/PPO.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def __init__(self,
critic_network,
gamma,
action_num,
actor_lr,
critic_lr,
device):

self.type = "policy"
Expand All @@ -32,6 +34,9 @@ def __init__(self,
self.action_num = action_num
self.device = device

self.actor_net_optimiser = torch.optim.Adam(self.actor_net.parameters(), lr=actor_lr)
self.critic_net_optimiser = torch.optim.Adam(self.critic_net.parameters(), lr=critic_lr)

self.k = 10
self.eps_clip = 0.2
self.cov_var = torch.full(size=(action_num,), fill_value=0.5).to(self.device)
Expand Down Expand Up @@ -112,13 +117,13 @@ def train_policy(self, experience):
actor_loss = (-torch.minimum(surr1, surr2)).mean()
critic_loss = F.mse_loss(v, rtgs)

self.actor_net.optimiser.zero_grad()
self.actor_net_optimiser.zero_grad()
actor_loss.backward(retain_graph=True)
self.actor_net.optimiser.step()
self.actor_net_optimiser.step()

self.critic_net.optimiser.zero_grad()
self.critic_net_optimiser.zero_grad()
critic_loss.backward()
self.critic_net.optimiser.step()
self.critic_net_optimiser.step()

info['td_error'] = td_errors
info['actor_loss'] = actor_loss
Expand Down
13 changes: 9 additions & 4 deletions cares_reinforcement_learning/algorithm/policy/SAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def __init__(self,
gamma,
tau,
action_num,
actor_lr,
critic_lr,
device):

self.type = "policy"
Expand All @@ -40,6 +42,9 @@ def __init__(self,
self.target_entropy = -action_num
# self.target_entropy = -torch.prod(torch.Tensor([action_num]).to(self.device)).item()

self.actor_net_optimiser = torch.optim.Adam(self.actor_net.parameters(), lr=actor_lr)
self.critic_net_optimiser = torch.optim.Adam(self.critic_net.parameters(), lr=critic_lr)

init_temperature = 0.01
self.log_alpha = torch.tensor(np.log(init_temperature)).to(device)
self.log_alpha.requires_grad = True
Expand Down Expand Up @@ -95,9 +100,9 @@ def train_policy(self, experiences):
critic_loss_total = critic_loss_one + critic_loss_two

# Update the Critic
self.critic_net.optimiser.zero_grad()
self.critic_net_optimiser.zero_grad()
critic_loss_total.backward()
self.critic_net.optimiser.step()
self.critic_net_optimiser.step()

pi, log_pi, _ = self.actor_net.sample(states)
qf1_pi, qf2_pi = self.critic_net(states, pi)
Expand All @@ -106,9 +111,9 @@ def train_policy(self, experiences):
actor_loss = ((self.alpha * log_pi) - min_qf_pi).mean()

# Update the Actor
self.actor_net.optimiser.zero_grad()
self.actor_net_optimiser.zero_grad()
actor_loss.backward()
self.actor_net.optimiser.step()
self.actor_net_optimiser.step()

# update the temperature
alpha_loss = -(self.log_alpha * (log_pi + self.target_entropy).detach()).mean()
Expand Down
14 changes: 10 additions & 4 deletions cares_reinforcement_learning/algorithm/policy/TD3.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def __init__(self,
gamma,
tau,
action_num,
actor_lr,
critic_lr,
device):

self.type = "policy"
Expand All @@ -35,6 +37,10 @@ def __init__(self,
self.action_num = action_num
self.device = device

self.actor_net_optimiser = torch.optim.Adam(self.actor_net.parameters(), lr=actor_lr)
self.critic_net_optimiser = torch.optim.Adam(self.critic_net.parameters(), lr=critic_lr)


def select_action_from_policy(self, state, evaluation=False, noise_scale=0.1):
self.actor_net.eval()
with torch.no_grad():
Expand Down Expand Up @@ -87,9 +93,9 @@ def train_policy(self, experiences):
critic_loss_total = critic_loss_one + critic_loss_two

# Update the Critic
self.critic_net.optimiser.zero_grad()
self.critic_net_optimiser.zero_grad()
critic_loss_total.backward()
self.critic_net.optimiser.step()
self.critic_net_optimiser.step()

if self.learn_counter % self.policy_update_freq == 0:
# Update Actor
Expand All @@ -98,9 +104,9 @@ def train_policy(self, experiences):
actor_q_values = torch.minimum(actor_q_one, actor_q_two)
actor_loss = -actor_q_values.mean()

self.actor_net.optimiser.zero_grad()
self.actor_net_optimiser.zero_grad()
actor_loss.backward()
self.actor_net.optimiser.step()
self.actor_net_optimiser.step()

# Update target network params
for target_param, param in zip(self.target_critic_net.Q1.parameters(), self.critic_net.Q1.parameters()):
Expand Down
8 changes: 6 additions & 2 deletions cares_reinforcement_learning/algorithm/value/DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ class DQN:
def __init__(self,
network,
gamma,
network_lr,
device):

self.type = "value"
self.network = network.to(device)
self.device = device
self.gamma = gamma

self.network_optimiser = torch.optim.Adam(self.network.parameters(), lr=network_lr)

def select_action_from_policy(self, state):
self.network.eval()
with torch.no_grad():
Expand Down Expand Up @@ -48,9 +52,9 @@ def train_policy(self, experiences):

# Update the Network
loss = F.mse_loss(best_q_values, q_target)
self.network.optimiser.zero_grad()
self.network_optimiser.zero_grad()
loss.backward()
self.network.optimiser.step()
self.network_optimiser.step()

info['q_target'] = q_target
info['q_values_min'] = best_q_values
Expand Down
13 changes: 9 additions & 4 deletions cares_reinforcement_learning/algorithm/value/DoubleDQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(self,
network,
gamma,
tau,
network_lr,
device):

self.type = "value"
Expand All @@ -26,14 +27,17 @@ def __init__(self,
self.tau = tau
self.device = device

self.network_optimiser = torch.optim.Adam(self.network.parameters(), lr=network_lr)


def select_action_from_policy(self, state):
self.actor_net.eval()
self.network.eval()
with torch.no_grad():
state_tensor = torch.FloatTensor(state).to(self.device)
state_tensor = state_tensor.unsqueeze(0)
q_values = self.network(state_tensor)
action = torch.argmax(q_values).item()
self.actor_net.train()
self.network.train()
return action

def train_policy(self, experiences):
Expand All @@ -57,9 +61,10 @@ def train_policy(self, experiences):
q_target = rewards + self.gamma * (1 - dones) * next_q_value

loss = F.mse_loss(q_value, q_target)
self.network.optimiser.zero_grad()

self.network_optimiser.zero_grad()
loss.backward()
self.network.optimiser.step()
self.network_optimiser.step()

for target_param, param in zip(self.target_network.parameters(), self.network.parameters()):
target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau))
Expand Down
4 changes: 1 addition & 3 deletions cares_reinforcement_learning/networks/DDPG/Actor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Actor(nn.Module):
def __init__(self, observation_size, num_actions, learning_rate):
def __init__(self, observation_size, num_actions):
super(Actor, self).__init__()

self.hidden_size = [1024, 1024]
Expand All @@ -13,7 +12,6 @@ def __init__(self, observation_size, num_actions, learning_rate):
self.h_linear_2 = nn.Linear(in_features=self.hidden_size[0], out_features=self.hidden_size[1])
self.h_linear_3 = nn.Linear(in_features=self.hidden_size[1], out_features=num_actions)

self.optimiser = optim.Adam(self.parameters(), lr=learning_rate)

def forward(self, state):
x = F.relu(self.h_linear_1(state))
Expand Down
4 changes: 1 addition & 3 deletions cares_reinforcement_learning/networks/DDPG/Critic.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


class Critic(nn.Module):
def __init__(self, observation_size, num_actions, learning_rate):
def __init__(self, observation_size, num_actions):
super(Critic, self).__init__()

self.hidden_size = [1024, 1024]
Expand All @@ -15,7 +14,6 @@ def __init__(self, observation_size, num_actions, learning_rate):
self.h_linear_2 = nn.Linear(self.hidden_size[0], self.hidden_size[1])
self.h_linear_3 = nn.Linear(self.hidden_size[1], 1)

self.optimiser = optim.Adam(self.parameters(), lr=learning_rate)

def forward(self, state, action):
obs_action = torch.cat([state, action], dim=1)
Expand Down
7 changes: 2 additions & 5 deletions cares_reinforcement_learning/networks/DQN/network.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import torch

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Network(nn.Module):
def __init__(self, observation_size, num_actions, learning_rate):
def __init__(self, observation_size, num_actions):
super(Network, self).__init__()

self.hidden_size = [1024, 1024]
Expand All @@ -13,8 +12,6 @@ def __init__(self, observation_size, num_actions, learning_rate):
self.h_linear_2 = nn.Linear(in_features=self.hidden_size[0], out_features=self.hidden_size[1])
self.h_linear_3 = nn.Linear(in_features=self.hidden_size[1], out_features=num_actions)

self.optimiser = optim.Adam(self.parameters(), lr=learning_rate)

def forward(self, state):
x = F.relu(self.h_linear_1(state))
x = F.relu(self.h_linear_2(x))
Expand Down
7 changes: 2 additions & 5 deletions cares_reinforcement_learning/networks/DoubleDQN/network.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import torch

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Network(nn.Module):
def __init__(self, observation_size, num_actions, learning_rate):
def __init__(self, observation_size, num_actions):
super(Network, self).__init__()

self.hidden_size = [1024, 1024]
Expand All @@ -13,8 +12,6 @@ def __init__(self, observation_size, num_actions, learning_rate):
self.h_linear_2 = nn.Linear(in_features=self.hidden_size[0], out_features=self.hidden_size[1])
self.h_linear_3 = nn.Linear(in_features=self.hidden_size[1], out_features=num_actions)

self.optimiser = optim.Adam(self.parameters(), lr=learning_rate)

def forward(self, state):
x = F.relu(self.h_linear_1(state))
x = F.relu(self.h_linear_2(x))
Expand Down
7 changes: 3 additions & 4 deletions cares_reinforcement_learning/networks/DuelingDQN/Network.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import torch

import torch.nn as nn
import torch.optim as optim


class DuelingNetwork(nn.Module):

def __init__(self, observation_space_size, action_num, learning_rate):
def __init__(self, observation_space_size, action_num):
super(DuelingNetwork, self).__init__()
self.input_dim = observation_space_size
self.output_dim = action_num
Expand All @@ -28,7 +28,6 @@ def __init__(self, observation_space_size, action_num, learning_rate):
nn.Linear(1024, self.output_dim)
)

self.optimiser = optim.Adam(self.parameters(), lr=learning_rate)

def forward(self, state):
features = self.feature_layer(state)
Expand Down
4 changes: 1 addition & 3 deletions cares_reinforcement_learning/networks/PPO/Actor.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F


class Actor(nn.Module):
def __init__(self, observation_size, num_actions, learning_rate):
def __init__(self, observation_size, num_actions):
super(Actor, self).__init__()

self.hidden_size = [1024, 1024]
Expand All @@ -14,7 +13,6 @@ def __init__(self, observation_size, num_actions, learning_rate):
self.h_linear_2 = nn.Linear(in_features=self.hidden_size[0], out_features=self.hidden_size[1])
self.h_linear_3 = nn.Linear(in_features=self.hidden_size[1], out_features=num_actions)

self.optimiser = optim.Adam(self.parameters(), lr=learning_rate)

def forward(self, state):
x = F.relu(self.h_linear_1(state))
Expand Down
6 changes: 2 additions & 4 deletions cares_reinforcement_learning/networks/PPO/Critic.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import torch

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class Critic(nn.Module):
def __init__(self, observation_size, learning_rate):
def __init__(self, observation_size):
super(Critic, self).__init__()

self.hidden_size = [1024, 1024]
Expand All @@ -14,7 +13,6 @@ def __init__(self, observation_size, learning_rate):
self.h_linear_2 = nn.Linear(self.hidden_size[0], self.hidden_size[1])
self.h_linear_3 = nn.Linear(self.hidden_size[1], 1)

self.optimiser = optim.Adam(self.parameters(), lr=learning_rate)

def forward(self, state):
q1 = F.relu(self.h_linear_1(state))
Expand Down
Loading

0 comments on commit 2e56193

Please sign in to comment.