Hello everybody,
First of all I would like to thank you for creating this great tool and the detailed tutorials!
Unfortunately, I always run into problems when trying to create my own models. There are different error messages occurring, when I am trying to train my models.
As an example, I am trying to create a VAE which slightly differs from the one used by scvi-tools.
First I am creating a different Decoder
from typing import Iterable
import torch
from scvi.nn import FCLayers
from torch import nn as nn
# Decoder
class MyDecoder(nn.Module):
"""
Decodes data from latent space to data space.
``n_input`` dimensions to ``n_output``
dimensions using a fully-connected neural network of ``n_hidden`` layers.
Output is the mean and variance of a multivariate Gaussian
Parameters
----------
n_input
The dimensionality of the input (latent space)
n_output
The dimensionality of the output (data space)
n_cat_list
A list containing the number of categories
for each category of interest. Each category will be
included using a one-hot encoding
n_layers
The number of fully-connected hidden layers
n_hidden
The number of nodes per hidden layer
dropout_rate
Dropout rate to apply to each of the hidden layers
kwargs
Keyword args for :class:`~scvi.modules._base.FCLayers`
"""
def __init__(
self,
n_input: int,
n_output: int,
n_cat_list: Iterable[int] = None,
n_layers: int = 1,
n_hidden: int = 128,
dropout_rate: float = 0.2,
**kwargs,
):
super().__init__()
self.decoder = FCLayers(
n_in=n_input,
n_out=n_hidden,
n_cat_list=n_cat_list,
n_layers=n_layers,
n_hidden=n_hidden,
dropout_rate=dropout_rate,
**kwargs,
)
self.linear_out = nn.Linear(n_hidden, n_output)
def forward(self, x: torch.Tensor, *cat_list: int):
"""
The forward computation for a single sample.
#. Decodes the data from the latent space using the decoder network
#. Returns tensors for the mean and variance of a multivariate distribution
Parameters
----------
x
tensor with shape ``(n_input,)``
cat_list
list of category membership(s) for this sample
Returns
-------
2-tuple of :py:class:`torch.Tensor`
Mean and variance tensors of shape ``(n_output,)``
"""
p = self.linear_out(self.decoder(x, *cat_list))
return p
After that, I define the VAE module
import numpy as np
import torch
from torch.distributions import Normal, NegativeBinomial
from torch.distributions import kl_divergence as kl
from scvi.nn import Decoder, Encoder
from scvi import _CONSTANTS
from scvi.module.base import (
BaseModuleClass,
LossRecorder,
auto_move_data,
)
class MyModule(BaseModuleClass):
"""
Parameters
----------
n_input
Number of input genes
n_latent
Dimensionality of the latent space
"""
def __init__(
self,
n_input: int,
n_hidden: int = 800,
n_latent: int = 10,
n_layers: int = 2,
dropout_rate: float = 0.1,
kl_weight: float = 0.00005,
):
super().__init__()
# in the init, we create the parameters of our elementary stochastic computation unit.
# First, we setup the parameters of the generative model
self.n_layers = n_layers
self.n_latent = n_latent
self.kl_weight = kl_weight
self.decoder = MyDecoder(n_latent,
n_input
)
# Second, we setup the parameters of the variational distribution
self.z_encoder = Encoder(n_input,
n_latent
)
def _get_inference_input(self, tensors):
"""Parse the dictionary to get appropriate args"""
# let us fetch the raw counts, and add them to the dictionary
x = tensors[_CONSTANTS.X_KEY]
input_dict = dict(x=x)
return input_dict
@auto_move_data
def inference(self, x):
"""
High level inference method.
Runs the inference (encoder) model.
"""
qz_m, qz_v, z = self.z_encoder(x)
outputs = dict(qz_m=qz_m, qz_v=qz_v, z=z)
return outputs
def _get_generative_input(self, tensors, inference_outputs):
z = inference_outputs["z"]
input_dict = {
"z": z
}
return input_dict
@auto_move_data
def generative(self, z):
"""Runs the generative model."""
px = self.decoder(z)
return dict(px=px)
def loss(
self,
tensors,
inference_outputs,
generative_outputs,
):
x = tensors[_CONSTANTS.X_KEY]
qz_m = inference_outputs["qz_m"]
qz_v = inference_outputs["qz_v"]
p = generative_outputs["px"]
kld = kl(
Normal(qz_m, torch.sqrt(qz_v)),
Normal(0, 1),
).sum(dim=1)
rl = self.get_reconstruction_loss(p, x)
loss = (0.5 * rl + 0.5 * (kld * self.kl_weight)).mean()
kl_global = torch.randn(1)
return LossRecorder(loss, rl, kld, kl_global)
def get_reconstruction_loss(self, x, px) -> torch.Tensor:
loss = ((x - px) ** 2).sum(dim=1)
return loss
Then I am creating the model
import numpy
import numpy as np
import pandas as pd
import scanpy as sc
import torch
from anndata import AnnData
from scvi.module import VAE
from scvi.data import setup_anndata
from scvi.model.base import BaseModelClass, UnsupervisedTrainingMixin, VAEMixin
class Try1(UnsupervisedTrainingMixin, BaseModelClass, VAEMixin):
"""
single-cell Variational Inference [Lopez18]_.
"""
def __init__(
self,
adata: AnnData,
n_latent: int = 10,
n_hidden: int = 800,
n_layers: int = 2,
dropout_rate: float = 0.1,
**model_kwargs,
):
super(Try1, self).__init__(adata)
self.adata = adata
self.module = MyModule(
n_input=self.summary_stats["n_vars"],
#n_batch=self.summary_stats["n_batch"],
n_latent=n_latent,
n_hidden=n_hidden,
n_layers=n_layers,
dropout_rate=dropout_rate,
**model_kwargs,
)
self._model_summary_string = (
"VAE Model with the following params: \nn_hidden: {}, \nn_latent: {}, n_layers: {}, dropout_rate: {}"
).format(
n_hidden,
n_latent,
n_layers,
dropout_rate,
)
self.init_params_ = self._get_init_params(locals())
And try to train it with the pbmc5k data, that I prepared before.
pbmc5k_ready = pbmc5k_ready.copy()
scvi.data.setup_anndata(pbmc5k_ready)
model = Try1(pbmc5k_ready)
model.train(2)
Unfortunately I am getting this error message
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
Epoch 1/2: 0%| | 0/2 [00:00<?, ?it/s]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-133-37babc234801> in <module>()
----> 1 model.train(2)
12 frames
/usr/local/lib/python3.7/dist-packages/scvi/model/base/_training_mixin.py in train(self, max_epochs, use_gpu, train_size, validation_size, batch_size, early_stopping, plan_kwargs, **trainer_kwargs)
75 **trainer_kwargs,
76 )
---> 77 return runner()
/usr/local/lib/python3.7/dist-packages/scvi/train/_trainrunner.py in __call__(self)
74 self.training_plan.n_obs_training = len(self.model.train_indices)
75
---> 76 self.trainer.fit(self.training_plan, self.data_splitter)
77 self._update_history()
78
/usr/local/lib/python3.7/dist-packages/scvi/train/_trainer.py in fit(self, *args, **kwargs)
164 message="`LightningModule.configure_optimizers` returned `None`",
165 )
--> 166 super().fit(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloader, val_dataloaders, datamodule)
458 )
459
--> 460 self._run(model)
461
462 assert self.state.stopped
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run(self, model)
756
757 # dispatch `start_training` or `start_evaluating` or `start_predicting`
--> 758 self.dispatch()
759
760 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in dispatch(self)
797 self.accelerator.start_predicting(self)
798 else:
--> 799 self.accelerator.start_training(self)
800
801 def run_stage(self):
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/accelerators/accelerator.py in start_training(self, trainer)
94
95 def start_training(self, trainer: 'pl.Trainer') -> None:
---> 96 self.training_type_plugin.start_training(trainer)
97
98 def start_evaluating(self, trainer: 'pl.Trainer') -> None:
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py in start_training(self, trainer)
142 def start_training(self, trainer: 'pl.Trainer') -> None:
143 # double dispatch to initiate the training loop
--> 144 self._results = trainer.run_stage()
145
146 def start_evaluating(self, trainer: 'pl.Trainer') -> None:
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in run_stage(self)
807 if self.predicting:
808 return self.run_predict()
--> 809 return self.run_train()
810
811 def _pre_training_routine(self):
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in run_train(self)
869 with self.profiler.profile("run_training_epoch"):
870 # run train epoch
--> 871 self.train_loop.run_training_epoch()
872
873 if self.max_steps and self.max_steps <= self.global_step:
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/training_loop.py in run_training_epoch(self)
564
565 # handle epoch_output on epoch end
--> 566 self.on_train_epoch_end(epoch_output)
567
568 # log epoch metrics
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/training_loop.py in on_train_epoch_end(self, epoch_output)
604
605 # lightningmodule hook
--> 606 training_epoch_end_output = model.training_epoch_end(processed_epoch_output)
607
608 if training_epoch_end_output is not None:
/usr/local/lib/python3.7/dist-packages/scvi/train/_trainingplans.py in training_epoch_end(self, outputs)
139 # kl global same for each minibatch
140 kl_global = outputs[0]["kl_global"]
--> 141 elbo += kl_global
142 self.log("elbo_train", elbo / n_obs)
143 self.log("reconstruction_loss_train", rec_loss / n_obs)
RuntimeError: output with shape [] doesn't match the broadcast shape [1]
If anybody has an idea on how to solve this error I would be very thankful!
Creating this model is just a training for me, for a project, where I will have to create many different models, so I also highly appreciate any tips on how to go about creating new models with scvi.
I worked myself through the tutorials and looked at the skeleton, but am still running into errors while training my own models, which I often can not really interpret, since the train function seems to be kind of a black box.
With kindest regards from Cologne, Germany,
Lunas