Tutorial 3: Slide-seq or Stereo-seq dataset¶

In this tutorial, we show how to apply scSTADE to identify spatial domains on Slide-seq or Stereo-seq. As a example, we analyse the Stereo dataset.

The source code package is freely available at https://github.com/cuiyaxuan/scSTADE/tree/master. The datasets used in this study can be found at https://drive.google.com/drive/folders/1H-ymfCqlDR1wpMRX-bCewAjG5nOrIF51?usp=sharing.

import sys
sys.path.append("/.../scSTADE-master/scSTADE_Cluster_Functions")
# Input the path.
from scSTADE import scSTADE
import os
import torch
import pandas as pd
import scanpy as sc
from sklearn import metrics
import multiprocessing as mp

def setup_seed(seed=41):
    import torch
    import os
    import numpy as np
    import random
    torch.manual_seed(seed)
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    if torch.cuda.is_available():
        # torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        #os.environ['PYTHONHASHSEED'] = str(seed)

setup_seed(41)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


n_clusters = 10  ###### the number of spatial domains.
file_path = '/home/cuiyaxuan/spatialLIBD/5.Mouse_Olfactory/' #please replace 'file_path' with the download path
adata = sc.read_h5ad(file_path + 'filtered_feature_bc_matrix.h5ad') #### project name
adata.var_names_make_unique()
model = scSTADE(adata,datatype='Slide',device=device,n_top_genes=4000)
adata = model.train()
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
from utils import clustering

if tool == 'mclust':
   clustering(adata, n_clusters, radius=radius, method=tool, refinement=True)
elif tool in ['leiden', 'louvain']:
   clustering(adata, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)

adata.obs['domain']
adata.obs['domain'].to_csv("label.csv")

/home/cuiyaxuan/anaconda3/envs/pytorch/lib/python3.8/site-packages/scanpy/preprocessing/_highly_variable_genes.py:62: UserWarning: flavor='seurat_v3' expects raw count data, but non-integers were found.
  warnings.warn(

Graph constructed!
Building sparse matrix ...
Begin to train ST data...

0%|                                                   | 0/500 [00:00<?, ?it/s]

0%|                                           | 1/500 [00:00<04:24,  1.89it/s]

0%|▏                                          | 2/500 [00:00<04:01,  2.07it/s]

1%|▎                                          | 3/500 [00:01<03:53,  2.13it/s]

1%|▎                                          | 4/500 [00:01<03:49,  2.16it/s]

100%|█████████████████████████████████████████| 500/500 [03:49<00:00,  2.17it/s]

Optimization finished for ST data!

R[write to console]:                    __           __
   ____ ___  _____/ /_  _______/ /_
  / __ `__ / ___/ / / / / ___/ __/
 / / / / / / /__/ / /_/ (__  ) /_
/_/ /_/ /_/___/_/__,_/____/__/   version 6.0.0
Type 'citation("mclust")' for citing this R package in publications.

fitting ...
  |======================================================================| 100%

import matplotlib as mpl
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import visual_high
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams["font.sans-serif"] = "Arial"
warnings.filterwarnings('ignore')
file_path = '/home/cuiyaxuan/spatialLIBD/5.Mouse_Olfactory/' #please replace 'file_path' with the download path
adata = sc.read_h5ad(file_path + 'filtered_feature_bc_matrix.h5ad') #### project name
df_label=pd.read_csv('./label.csv', index_col=0)
visual_high.visual(adata,df_label)

#cells after MT filter: 19109
WARNING: saving figure to file figures/spatialHippocampus.pdf