I’m new to Python, Scanpy, and Anndata, please let me know if I made any mistakes. This warning is really confusing me for a long time.
Background: I have downloaded a h5ad file from a dataset, and loaded it using Scanpy. I successfully read the h5ad file, but I found that its var_names contain gene_id, so I want to change var_names and var_names_make_unique().
import scanpy as sc
LW119 = sc.read_h5ad('/data/.../adata_LW119.h5ad')
LW119.var
feature symbol
ENSG00000243485_MIR1302-2HG ENSG00000243485_MIR1302-2HG MIR1302-2HG
ENSG00000237613_FAM138A ENSG00000237613_FAM138A FAM138A
ENSG00000186092_OR4F5 ENSG00000186092_OR4F5 OR4F5
ENSG00000238009_AL627309.1 ENSG00000238009_AL627309.1 AL627309.1
ENSG00000239945_AL627309.3 ENSG00000239945_AL627309.3 AL627309.3
... ... ...
ENSG00000277856_AC233755.2 ENSG00000277856_AC233755.2 AC233755.2
ENSG00000275063_AC233755.1 ENSG00000275063_AC233755.1 AC233755.1
ENSG00000271254_AC240274.1 ENSG00000271254_AC240274.1 AC240274.1
ENSG00000277475_AC213203.1 ENSG00000277475_AC213203.1 AC213203.1
ENSG00000268674_FAM231C ENSG00000268674_FAM231C FAM231C
33538 rows × 2 columns
LW119.var_names = LW119.var.symbol
LW119.var
symbol feature symbol
MIR1302-2HG ENSG00000243485_MIR1302-2HG MIR1302-2HG
FAM138A ENSG00000237613_FAM138A FAM138A
OR4F5 ENSG00000186092_OR4F5 OR4F5
AL627309.1 ENSG00000238009_AL627309.1 AL627309.1
AL627309.3 ENSG00000239945_AL627309.3 AL627309.3
... ... ...
AC233755.2 ENSG00000277856_AC233755.2 AC233755.2
AC233755.1 ENSG00000275063_AC233755.1 AC233755.1
AC240274.1 ENSG00000271254_AC240274.1 AC240274.1
AC213203.1 ENSG00000277475_AC213203.1 AC213203.1
FAM231C ENSG00000268674_FAM231C FAM231C
33538 rows × 2 columns
LW119.var_names
CategoricalIndex(['MIR1302-2HG', 'FAM138A', 'OR4F5', 'AL627309.1',
'AL627309.3', 'AL627309.2', 'AL627309.4', 'AL732372.1',
'OR4F29', 'AC114498.1',
...
'AC007325.2', 'BX072566.1', 'AL354822.1', 'AC023491.2',
'AC004556.1', 'AC233755.2', 'AC233755.1', 'AC240274.1',
'AC213203.1', 'FAM231C'],
categories=['A1BG', 'A1BG-AS1', 'A1CF', 'A2M', 'A2M-AS1', 'A2ML1', 'A2ML1-AS1', 'A2ML1-AS2', ...], ordered=False, dtype='category', name='symbol', length=33538)
LW119.var_names_make_unique()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[22], line 1
----> 1 LW119.var_names_make_unique()
File /mnt/wangd/anaconda3/lib/python3.10/site-packages/anndata/_core/anndata.py:1894, in AnnData.var_names_make_unique(self, join)
1892 def var_names_make_unique(self, join: str = "-"):
1893 # Important to go through the setter so obsm dataframes are updated too
-> 1894 self.var_names = utils.make_index_unique(self.var.index, join)
File /mnt/wangd/anaconda3/lib/python3.10/site-packages/anndata/utils.py:253, in make_index_unique(index, join)
251 if tentative_new_name not in values_set:
252 values_set.add(tentative_new_name)
--> 253 values_dup[i] = tentative_new_name
254 break
255 issue_interpretation_warning = True
File /mnt/wangd/anaconda3/lib/python3.10/site-packages/pandas/core/arrays/_mixins.py:249, in NDArrayBackedExtensionArray.__setitem__(self, key, value)
247 def __setitem__(self, key, value):
248 key = check_array_indexer(self, key)
--> 249 value = self._validate_setitem_value(value)
250 self._ndarray[key] = value
File /mnt/wangd/anaconda3/lib/python3.10/site-packages/pandas/core/arrays/categorical.py:1457, in Categorical._validate_setitem_value(self, value)
1455 return self._validate_listlike(value)
1456 else:
-> 1457 return self._validate_scalar(value)
File /mnt/wangd/anaconda3/lib/python3.10/site-packages/pandas/core/arrays/categorical.py:1484, in Categorical._validate_scalar(self, fill_value)
1482 fill_value = self._unbox_scalar(fill_value)
1483 else:
-> 1484 raise TypeError(
1485 "Cannot setitem on a Categorical with a new "
1486 f"category ({fill_value}), set the categories first"
1487 )
1488 return fill_value
TypeError: Cannot setitem on a Categorical with a new category (RGS5-1), set the categories first