Environment setup¶
In [ ]:
# Load the packages
import gc
import scanpy as sc
import decoupler as dc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
# Set the system path
import os
import sys
sys.path.append(os.path.abspath('../../'))
from Code.Utils.Preprocessing_scanpy import *
# Plotting options, change to your liking
sc.settings.set_figure_params(dpi=300, frameon=False)
sc.set_figure_params(dpi=300)
sc.set_figure_params(figsize=(4, 4))
Load Data¶
In [ ]:
# Load example dataset
adata = sc.read('../../data/Preprocessed_data/harmony_cellmarkers.h5ad')
adata
Out[Â ]:
AnnData object with n_obs × n_vars = 7391 × 33694 obs: 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'cell_type' var: 'mean', 'std', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'batch_colors', 'cell_type_colors', 'dendrogram_cell_type', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap', 'ora_estimate', 'ora_pvals' varm: 'PCs' layers: 'log_norm', 'norm', 'raw', 'scale_data' obsp: 'connectivities', 'distances'
Differential gene expression¶
In [ ]:
# The result would be stored in the adata.uns[method], method is the key_added.
# The result includes 'params', 'names', 'scores', 'pvals', 'pvals_adj', 'logfoldchanges'.
sc.tl.rank_genes_groups(adata, 'cell_type', method='t-test', key_added = "t-test")
# sc.tl.rank_genes_groups(adata, 'cell_type', method='t-test_overestim_var', key_added = "t-test_ov")
sc.tl.rank_genes_groups(adata, 'cell_type', method='wilcoxon', key_added = "wilcoxon")
sc.tl.rank_genes_groups(adata, 'cell_type', method='logreg', key_added = "logreg")
Top differential genes visualization¶
In [ ]:
# Visualize top 5 differentially expressed genes in heatmap, dotplot, stacked_violin and matrixplot
sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, key="wilcoxon", groupby="cell_type", show_gene_labels=True)
sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, key="wilcoxon", groupby="cell_type")
sc.pl.rank_genes_groups_stacked_violin(adata, n_genes=5, key="wilcoxon", groupby="cell_type")
sc.pl.rank_genes_groups_matrixplot(adata, n_genes=5, key="wilcoxon", groupby="cell_type")