In [12]:
Copied!
import torch
from torch import Tensor, nn
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import Tensor, nn
import matplotlib.pyplot as plt
import numpy as np
Normalising for NLLLoss¶
Divide NLL by log(N), where N is the size of the choice.
In [13]:
Copied!
for n in range(1, 9):
o = torch.rand(128, n)
p = nn.Softmax(-1)(o)
lp = nn.LogSoftmax(-1)(o)
norm = np.log(n)
labels = torch.argmax(p, -1)
loss = nn.NLLLoss()(lp, labels)
nloss = loss / norm
print(loss, nloss)
for n in range(1, 9):
o = torch.rand(128, n)
p = nn.Softmax(-1)(o)
lp = nn.LogSoftmax(-1)(o)
norm = np.log(n)
labels = torch.argmax(p, -1)
loss = nn.NLLLoss()(lp, labels)
nloss = loss / norm
print(loss, nloss)
tensor(0.) tensor(nan) tensor(0.5516) tensor(0.7958) tensor(0.8900) tensor(0.8101) tensor(1.1101) tensor(0.8008) tensor(1.2883) tensor(0.8004) tensor(1.4584) tensor(0.8140) tensor(1.6098) tensor(0.8273) tensor(1.7186) tensor(0.8265)
Normalising for KLD¶
In [19]:
Copied!
def kld(mu: Tensor, log_var: Tensor) -> Tensor:
return torch.mean(
-0.5 * torch.sum(1 + log_var - mu**2 - log_var.exp(), dim=1), dim=0
)
def kld(mu: Tensor, log_var: Tensor) -> Tensor:
return torch.mean(
-0.5 * torch.sum(1 + log_var - mu**2 - log_var.exp(), dim=1), dim=0
)
In [22]:
Copied!
for n in range(1, 9):
fake = torch.rand(128, 128, n)
mu = fake.mean(dim=0)
log_var = torch.log(fake.var(dim=0))
loss = kld(mu, log_var)
nloss = loss / n
print(loss, nloss)
for n in range(1, 9):
fake = torch.rand(128, 128, n)
mu = fake.mean(dim=0)
log_var = torch.log(fake.var(dim=0))
loss = kld(mu, log_var)
nloss = loss / n
print(loss, nloss)
tensor(0.9069) tensor(0.9069) tensor(1.8243) tensor(0.9121) tensor(2.7375) tensor(0.9125) tensor(3.6475) tensor(0.9119) tensor(4.5428) tensor(0.9086) tensor(5.4654) tensor(0.9109) tensor(6.3581) tensor(0.9083) tensor(7.3023) tensor(0.9128)
In [ ]:
Copied!