Tl.ingest not working (`sca.datasets` MWE)

This error also appears when integrating my [non-sca.datasets] actual data.

MWE (from tl.ingest docs)

import scanpy as sca

adata_check_ref = sca.datasets.pbmc3k_processed()  # this is an earlier version of the dataset from the pbmc3k tutorial
adata_check = sca.datasets.pbmc68k_reduced()

var_names = adata_check_ref.var_names.intersection(adata_check.var_names)
adata_check_ref = adata_check_ref[:, var_names]
adata_check = adata_check[:, var_names]

sca.tl.ingest(adata_check, adata_check_ref, obs='louvain')

Error

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Input In [49], in <cell line: 10>()
      7 adata_check_ref = adata_check_ref[:, var_names]
      8 adata_check = adata_check[:, var_names]
---> 10 sca.tl.ingest(adata_check, adata_check_ref, obs='louvain')

File /usr/local/python/lib/python3.8/site-packages/scanpy/tools/_ingest.py:126, in ingest(adata, adata_ref, obs, embedding_method, labeling_method, neighbors_key, inplace, **kwargs)
    123 if len(labeling_method) == 1 and len(obs or []) > 1:
    124     labeling_method = labeling_method * len(obs)
--> 126 ing = Ingest(adata_ref, neighbors_key)
    127 ing.fit(adata)
    129 for method in embedding_method:

File /usr/local/python/lib/python3.8/site-packages/scanpy/tools/_ingest.py:380, in Ingest.__init__(self, adata, neighbors_key)
    377 self._use_pynndescent = False
    379 if 'pca' in adata.uns:
--> 380     self._init_pca(adata)
    382 if neighbors_key is None:
    383     neighbors_key = 'neighbors'

File /usr/local/python/lib/python3.8/site-packages/scanpy/tools/_ingest.py:355, in Ingest._init_pca(self, adata)
    354 def _init_pca(self, adata):
--> 355     self._pca_centered = adata.uns['pca']['params']['zero_center']
    356     self._pca_use_hvg = adata.uns['pca']['params']['use_highly_variable']
    358     if self._pca_use_hvg and 'highly_variable' not in adata.var.keys():

KeyError: 'params'

Environment

absl-py==1.0.0
anndata==0.8.0
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
asttokens==2.0.5
attrs==21.4.0
backcall==0.2.0
beautifulsoup4==4.11.1
bleach==5.0.0
cachetools==5.2.0
certifi==2022.5.18.1
cffi==1.15.0
charset-normalizer==2.0.12
cycler==0.11.0
debugpy==1.6.0
decorator==5.1.1
defusedxml==0.7.1
entrypoints==0.4
executing==0.8.3
fa2==0.3.5
fastjsonschema==2.15.3
fonttools==4.33.3
GEOparse==2.0.3
google-auth==2.6.6
google-auth-oauthlib==0.4.6
grpcio==1.46.3
h5py==3.7.0
idna==3.3
igraph==0.9.10
importlib-metadata==4.11.4
importlib-resources==5.7.1
ipykernel==6.13.0
ipython==8.4.0
ipython-genutils==0.2.0
ipywidgets==7.7.0
jedi==0.18.1
Jinja2==3.1.2
joblib==1.1.0
jsonschema==4.5.1
jupyter-client==7.3.1
jupyter-console==6.4.3
jupyter-core==4.10.0
jupyterlab-pygments==0.2.2
jupyterlab-widgets==1.1.0
kiwisolver==1.4.2
leidenalg==0.8.10
llvmlite==0.38.1
Markdown==3.3.7
MarkupSafe==2.1.1
matplotlib==3.5.2
matplotlib-inline==0.1.3
mistune==0.8.4
natsort==8.1.0
nbclient==0.6.3
nbconvert==6.5.0
nbformat==5.4.0
nest-asyncio==1.5.5
networkx==2.8.2
notebook==6.4.11
numba==0.55.1
numpy==1.21.6
oauthlib==3.2.0
packaging==21.3
pandas==1.4.2
pandocfilters==1.5.0
parso==0.8.3
patsy==0.5.2
pexpect==4.8.0
pickleshare==0.7.5
Pillow==9.1.1
prometheus-client==0.14.1
prompt-toolkit==3.0.29
protobuf==3.20.1
psutil==5.9.1
ptyprocess==0.7.0
pure-eval==0.2.2
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.21
Pygments==2.12.0
pynndescent==0.5.7
pyparsing==3.0.9
pyrsistent==0.18.1
python-dateutil==2.8.2
pytz==2022.1
pyzmq==23.0.0
requests==2.27.1
requests-oauthlib==1.3.1
rsa==4.8
scanpy==1.9.1
scikit-learn==1.1.1
scipy==1.8.1
seaborn==0.11.2
Send2Trash==1.8.0
session-info==1.0.0
six==1.16.0
soupsieve==2.3.2.post1
stack-data==0.2.0
statsmodels==0.13.2
stdlib-list==0.8.0
tensorboard==2.9.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
terminado==0.15.0
texttable==1.6.4
threadpoolctl==3.1.0
tinycss2==1.1.1
torch-tb-profiler==0.4.0
tornado==6.1
tqdm==4.64.0
traitlets==5.2.1.post0
typing_extensions==4.2.0
umap-learn==0.5.3
urllib3==1.26.9
wcwidth==0.2.5
webencodings==0.5.1
Werkzeug==2.1.2
widgetsnbextension==3.6.0
zipp==3.8.0
--extra-index-url https://download.pytorch.org/whl/cu113
torch==1.11.0+cu113
torchvision==0.12.0+cu113
torchaudio==0.11.0+cu113

Hm, it looks like the anndata object has no parameters from running the PCA. You can rerun the pca and then run ingest.

1 Like

Thanks! The following works:

import scanpy as sca

adata_check_ref = sca.datasets.pbmc3k_processed()  # this is an earlier version of the dataset from the pbmc3k tutorial
adata_check = sca.datasets.pbmc68k_reduced()

# Add mappings to `tl.ingest` reference (all three following lines are necessary)
sca.pp.pca(adata_check_ref)
sca.pp.neighbors(adata_check_ref)
sca.tl.umap(adata_check_ref)

var_names = adata_check_ref.var_names.intersection(adata_check.var_names)
adata_check_ref = adata_check_ref[:, var_names]
adata_check = adata_check[:, var_names]

sca.tl.ingest(adata_check, adata_check_ref, obs='louvain')