How to convert mouse gene_id to Entrez gene ids

Hi,
I used the following code to get biomart gene annotation for my mouse genes:
annot = sc.queries.biomart_annotations(
“mmusculus”,
[“ensembl_gene_id”, “entrez_gene_id”,“start_position”, “end_position”, “chromosome_name”],
).set_index(“ensembl_gene_id”)
annot

but got error:
KeyError Traceback (most recent call last)
File ~/ENTER/lib/python3.9/site-packages/pybiomart/dataset.py:243, in Dataset.query(self, attributes, filters, only_unique, use_attr_names)
242 try:
→ 243 attr = self.attributes[name]
244 self._add_attr_node(dataset, attr)

KeyError: ‘entrez_gene_id’

During handling of the above exception, another exception occurred:

BiomartException Traceback (most recent call last)
Input In [62], in <cell line: 1>()
----> 1 annot = sc.queries.biomart_annotations(
2 “mmusculus”,
3 [“ensembl_gene_id”, “entrez_gene_id”,“start_position”, “end_position”, “chromosome_name”],
4 ).set_index(“ensembl_gene_id”)
5 annot

File ~/ENTER/lib/python3.9/site-packages/scanpy/queries/_queries.py:108, in biomart_annotations(org, attrs, host, use_cache)
74 @_doc_params(doc_org=_doc_org, doc_host=_doc_host, doc_use_cache=_doc_use_cache)
75 def biomart_annotations(
76 org: str,
(…)
80 use_cache: bool = False,
81 ) → pd.DataFrame:
82 “”"
83 Retrieve gene annotations from ensembl biomart.
84
(…)
106 >>> adata.var[annot.columns] = annot
107 “”"
→ 108 return simple_query(org=org, attrs=attrs, host=host, use_cache=use_cache)

File ~/ENTER/lib/python3.9/site-packages/scanpy/queries/_queries.py:70, in simple_query(org, attrs, filters, host, use_cache)
66 server = Server(host, use_cache=use_cache)
67 dataset = server.marts[“ENSEMBL_MART_ENSEMBL”].datasets[
68 “{}_gene_ensembl”.format(org)
69 ]
—> 70 res = dataset.query(attributes=attrs, filters=filters, use_attr_names=True)
71 return res

File ~/ENTER/lib/python3.9/site-packages/pybiomart/dataset.py:246, in Dataset.query(self, attributes, filters, only_unique, use_attr_names)
244 self._add_attr_node(dataset, attr)
245 except KeyError:
→ 246 raise BiomartException(
247 'Unknown attribute {}, check dataset attributes ’
248 ‘for a list of valid attributes.’.format(name))
250 if filters is not None:
251 # Add filter elements.
252 for name, value in filters.items():

BiomartException: Unknown attribute entrez_gene_id, check dataset attributes for a list of valid attributes.

I don’t know how to check the dataset attributes and see whether the mmusculus dataset contains the entrez_gene_ids. Could anyone help?

This is for running the CellO cell type annotation which requires the input dataset specifies either HUGO gene symbols or Entrez gene ID’s. So dose scanpy has the function to convert gene ids to HUGO gene symbols or Entrez gene IDs?

thank you!
Ting

I have used R biomaRt to find out the mmusculus dataset attributes. It is “entrezgene_id”. So I got annot dataframe which contains entrez gene ids.

But now I encountered another issue when I used:

cello_data.var[annot.columns] = annot

for map the my anndata (cello_data2) gene ids to entrez gene ids and add the entrez gene ids. I got the error:

ValueError Traceback (most recent call last)
Input In [84], in <cell line: 1>()
----> 1 cello_data2.var[annot.columns] = annot

File ~/ENTER/lib/python3.9/site-packages/pandas/core/frame.py:3643, in DataFrame.setitem(self, key, value)
3641 self._setitem_frame(key, value)
3642 elif isinstance(key, (Series, np.ndarray, list, Index)):
→ 3643 self._setitem_array(key, value)
3644 elif isinstance(value, DataFrame):
3645 self._set_item_frame_value(key, value)

File ~/ENTER/lib/python3.9/site-packages/pandas/core/frame.py:3687, in DataFrame._setitem_array(self, key, value)
3685 check_key_length(self.columns, key, value)
3686 for k1, k2 in zip(key, value.columns):
→ 3687 self[k1] = value[k2]
3689 elif not is_list_like(value):
3690 for col in key:

File ~/ENTER/lib/python3.9/site-packages/pandas/core/frame.py:3655, in DataFrame.setitem(self, key, value)
3652 self._setitem_array([key], value)
3653 else:
3654 # set column
→ 3655 self._set_item(key, value)

File ~/ENTER/lib/python3.9/site-packages/pandas/core/frame.py:3832, in DataFrame._set_item(self, key, value)
3822 def _set_item(self, key, value) → None:
3823 “”"
3824 Add series to DataFrame in specified column.
3825
(…)
3830 ensure homogeneity.
3831 “”"
→ 3832 value = self._sanitize_column(value)
3834 if (
3835 key in self.columns
3836 and value.ndim == 1
3837 and not is_extension_array_dtype(value)
3838 ):
3839 # broadcast across multiple columns if necessary
3840 if not self.columns.is_unique or isinstance(self.columns, MultiIndex):

File ~/ENTER/lib/python3.9/site-packages/pandas/core/frame.py:4532, in DataFrame._sanitize_column(self, value)
4530 # We should never get here with DataFrame value
4531 if isinstance(value, Series):
→ 4532 return _reindex_for_setitem(value, self.index)
4534 if is_list_like(value):
4535 com.require_length_match(value, self.index)

File ~/ENTER/lib/python3.9/site-packages/pandas/core/frame.py:10999, in _reindex_for_setitem(value, index)
10995 except ValueError as err:
10996 # raised in MultiIndex.from_tuples, see test_insert_error_msmgs
10997 if not value.index.is_unique:
10998 # duplicate axis

10999 raise err
11001 raise TypeError(
11002 "incompatible index of inserted column with frame index"
11003 ) from err
11004 return reindexed_value

File ~/ENTER/lib/python3.9/site-packages/pandas/core/frame.py:10994, in _reindex_for_setitem(value, index)
10992 # GH#4107
10993 try:

10994 reindexed_value = value.reindex(index)._values
10995 except ValueError as err:
10996 # raised in MultiIndex.from_tuples, see test_insert_error_msmgs
10997 if not value.index.is_unique:
10998 # duplicate axis

File ~/ENTER/lib/python3.9/site-packages/pandas/core/series.py:4672, in Series.reindex(self, *args, **kwargs)
4668 raise TypeError(
4669 “‘index’ passed as both positional and keyword argument”
4670 )
4671 kwargs.update({“index”: index})
→ 4672 return super().reindex(**kwargs)

File ~/ENTER/lib/python3.9/site-packages/pandas/core/generic.py:4966, in NDFrame.reindex(self, *args, **kwargs)
4963 return self._reindex_multi(axes, copy, fill_value)
4965 # perform the reindex on the axes
→ 4966 return self._reindex_axes(
4967 axes, level, limit, tolerance, method, fill_value, copy
4968 ).finalize(self, method=“reindex”)

File ~/ENTER/lib/python3.9/site-packages/pandas/core/generic.py:4986, in NDFrame._reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
4981 new_index, indexer = ax.reindex(
4982 labels, level=level, limit=limit, tolerance=tolerance, method=method
4983 )
4985 axis = self._get_axis_number(a)
→ 4986 obj = obj._reindex_with_indexers(
4987 {axis: [new_index, indexer]},
4988 fill_value=fill_value,
4989 copy=copy,
4990 allow_dups=False,
4991 )
4992 # If we’ve made a copy once, no need to make another one
4993 copy = False

File ~/ENTER/lib/python3.9/site-packages/pandas/core/generic.py:5032, in NDFrame._reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
5029 indexer = ensure_platform_int(indexer)
5031 # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi)
→ 5032 new_data = new_data.reindex_indexer(
5033 index,
5034 indexer,
5035 axis=baxis,
5036 fill_value=fill_value,
5037 allow_dups=allow_dups,
5038 copy=copy,
5039 )
5040 # If we’ve made a copy once, no need to make another one
5041 copy = False

File ~/ENTER/lib/python3.9/site-packages/pandas/core/internals/managers.py:679, in BaseBlockManager.reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy, consolidate, only_slice, use_na_proxy)
677 # some axes don’t allow reindexing with dups
678 if not allow_dups:
→ 679 self.axes[axis]._validate_can_reindex(indexer)
681 if axis >= self.ndim:
682 raise IndexError(“Requested axis not found in manager”)

File ~/ENTER/lib/python3.9/site-packages/pandas/core/indexes/base.py:4107, in Index._validate_can_reindex(self, indexer)
4105 # trying to reindex on an axis with duplicates
4106 if not self._index_as_unique and len(indexer):
→ 4107 raise ValueError(“cannot reindex on an axis with duplicate labels”)

ValueError: cannot reindex on an axis with duplicate labels

so, how to solve this?