Data Science Asked by Farhang Amaji on July 8, 2021
I want to train a neural network and I want to check its total loss in every 100 epoch but I get the error RuntimeError: CUDA out of memory. Tried to allocate ...
in line #1, sometimes I get this error but this time because I’ve tried big numbers of neuron in hidden layers I think this time this error may be real, even though I tried
torch.cuda.empty_cache()
import gc
gc.collect()
which are codes recommended to fix the error I just mentioned so my question is not how to solve the error as I said this time it may be correct (not really even trying to know if the 3GB GPU capacity is full or not), and I just want to know is there any other way to calculate all train data loss? even with using the loss of batches.
I think another way of asking this question is how to calculate total loss using losses of batches?
btw any other suggestions about my hidden layer sizes are also appreciable because I can’t really even overfit my 400000 samples train dataset.
input_size = 83
skipcolumns= 3
hidden_size = 640
output_size = 12
num_epochs = 10000
batch_size = 20000
learning_rate = .001
n_total_steps=int(len(trainx)/batch_size)+1
def seperator(i,divide,p):
return int((p[1]-p[0])/divide*i+p[0]),int((p[1]-p[0])/divide*(i+1)+p[0])
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(NeuralNet, self).__init__()
self.input_size = input_size
self.lin = nn.Linear(input_size, hidden_size)
self.relu = torch.nn.ReLU()
self.leakyrelu =nn.LeakyReLU(negative_slope=0.01, inplace=False)
self.l2 = nn.Linear(hidden_size, hidden_size)
self.l3 = nn.Linear(hidden_size, hidden_size)
self.l4 = nn.Linear(hidden_size, hidden_size)
self.l5 = nn.Linear(hidden_size, hidden_size)
self.l6 = nn.Linear(hidden_size, hidden_size)
self.lout = nn.Linear(hidden_size, output_size)
def forward(self, x):
# out = self.relu(out)
# out = torch.sigmoid(out)
out = self.lin(x)
out = self.leakyrelu(out)
out = self.l2(out)
out = self.leakyrelu(out)
out = self.l3(out)
out = self.leakyrelu(out)
# out = self.l4(out)
# out = self.leakyrelu(out)
# out = self.l5(out)
# out = self.leakyrelu(out)
# out = self.l6(out)
# out = self.leakyrelu(out)
out = self.lout(out)
# no activation and no softmax at the end
return out
# Loss and optimizer
criterion = nn.MSELoss()
model = NeuralNet(input_size, hidden_size, output_size).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
tti=time.time()
for epoch in range(num_epochs):
for btch in range(n_total_steps):
stind,endind=seperator(btch,n_total_steps,[0,len(trainx)])
thisx = trainx[stind:endind].to(device)
thisy = trainy[stind:endind].to(device)
outputs = model(thisx)
loss = criterion(outputs , thisy)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if keyboard.is_pressed('q'):
x=1/0
if epoch%10==0:
print(time.time()-tti)
if epoch%100==0:
thisx = trainx.to(device)
thisy = trainy.to(device)
outputs = model(thisx) #1
loss = criterion(outputs , thisy)
print(loss.item())
```
Get help from others!
Recent Answers
Recent Questions
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP