Do I have to input to a model for the speaker recognition the amount of speakers? If I do will it cause problems when testing?
So I have this model with an encoder-discriminator system, not exactly a GAN though. Here are the fragments with the speaker number:
class Model(nn.Module):
def __init__(self, num_speakers):
super(Model, self).__init__()
self.discriminator = Discriminator()
self.encoder = Encoder()
self.classifier = Classifier(num_speakers)
def forward(self, input, speakers):
print("input size", input.shape)
input = input.reshape((BATCHES * 3, 1, 3200))
embeddings = self.encoder(input.to("cuda:0")) # TODO
speakers_probs = self.classifier(embeddings.to("cuda:0"))
embeddings = embeddings.reshape((BATCHES, 3, constants.embedding_size))
....
class Classifier(nn.Module):
def __init__(self, num_speakers):
super(Classifier, self).__init__()
self.fc1 = nn.Linear(constants.embedding_size, constants.embedding_size) #inner layer
self.proj = nn.Linear(constants.embedding_size, num_speakers)
def encode(self, x):
return Fun.relu(self.fc1(x))
def forward(self, x):
x = self.encode(x) #hidden representation
x = Fun.softmax(self.proj(x), dim=1)
return x
...
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.fc1 = nn.Linear(2 * constants.embedding_size, 2 * constants.embedding_size)
self.proj = nn.Linear(2 * constants.embedding_size, 1)
def forward(self, x):
x = Fun.relu(self.fc1(x))
x = self.proj(x)
x = torch.sigmoid(x)
return x
It feels awkward to pass it in the main as model = Model(866) (the amount of speakers), and I have doubts if it won't be a problem when using the model for testing if it will recognize a person when the amount of speakers will be 1.
source https://stackoverflow.com/questions/73338868/do-i-have-to-input-to-a-model-for-the-speaker-recognition-the-amount-of-speakers
Comments
Post a Comment