Here is the code I ran :
sc.tl.rank_genes_groups(sco, layer='cluster_int', groupby='cluster_int', method='wilcoxon', corr_method = 'benjamini-hochberg', pts = True)
pattern = r'Rik$|Rik[0-9]$|^Gm[0-9]+|^AC[0-9]{6}'
results_list = [] # Whole result
results_short = [] # Shortened based on the z-score
logfc_threshold = 1
pval_threshold = 0.05
clusters = sco.obs['cluster_int'].unique()
n = 50
for cluster in clusters:
cluster_results = sc.get.rank_genes_groups_df(sco, group=cluster)
cluster_results['cluster'] = cluster
cluster_results = cluster_results.rename(columns={'names': 'gene'})
cluster_results = cluster_results.rename(columns={'scores': 'z-score'})
cluster_results = cluster_results.rename(columns={'pvals_adj': 'fdr'})
mask = cluster_results['gene'].str.contains(pattern, regex=True)
cluster_results = cluster_results[~mask]
# Filtering based on the pvalue and the logfold change
cluster_results = cluster_results[(cluster_results['pvals'] < pval_threshold) & (cluster_results['logfoldchanges'] > logfc_threshold)]
results_list.append(cluster_results)
# Make a smaller set based on the z-score
cluster_short = cluster_results.sort_values(by='z-score', ascending=False).head(n)
results_short.append(cluster_short)
results_df = pd.concat(results_list, ignore_index=True)
results_zscore = pd.concat(results_short, ignore_index=True)
top_genes = {}
for cluster in results_zscore['cluster'] :
top_genes[cluster] = results_zscore['gene'].tolist()
top_genes = {key: top_genes[cluster] for key in sorted(top_genes, key=int)}
sc.pl.rank_genes_groups_heatmap(sco, top_genes, layer='cluster_int', groupby='cluster_int', log=True, dendrogram=False)
After running it, I should expect a heatmap where the deg per cluster should be highlighted and form a “diagonal” on the graph. This is not the case.
I would like to have your advice on potential error ?
Also, the scale on the side of the heatmap is related to mean expression of the genes in group right ?