mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Add scVelo RNA velocity analysis workflow and IQ-TREE reference documentation
- Introduced a comprehensive RNA velocity analysis pipeline using scVelo, including data loading, preprocessing, velocity estimation, and visualization. - Added a script for running RNA velocity analysis with customizable parameters and output options. - Created detailed documentation for IQ-TREE 2 phylogenetic inference, covering command syntax, model selection, bootstrapping methods, and output interpretation. - Included references for velocity models and their mathematical framework, along with a comparison of different models. - Enhanced the scVelo skill documentation with installation instructions, use cases, and best practices for RNA velocity analysis.
This commit is contained in:
232
scientific-skills/scvelo/scripts/rna_velocity_workflow.py
Normal file
232
scientific-skills/scvelo/scripts/rna_velocity_workflow.py
Normal file
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
RNA Velocity Analysis Workflow using scVelo
|
||||
===========================================
|
||||
Complete pipeline from raw data to velocity visualization.
|
||||
|
||||
Usage:
|
||||
python rna_velocity_workflow.py
|
||||
|
||||
Or import and use run_velocity_analysis() with your AnnData object.
|
||||
"""
|
||||
|
||||
import scvelo as scv
|
||||
import scanpy as sc
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
matplotlib.use('Agg') # Non-interactive backend
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
|
||||
|
||||
def run_velocity_analysis(
|
||||
adata,
|
||||
groupby="leiden",
|
||||
n_top_genes=2000,
|
||||
n_neighbors=30,
|
||||
mode="dynamical",
|
||||
n_jobs=4,
|
||||
output_dir="velocity_results",
|
||||
):
|
||||
"""
|
||||
Complete RNA velocity analysis workflow.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
adata : AnnData
|
||||
AnnData object with 'spliced' and 'unspliced' layers.
|
||||
Should already have UMAP and cluster annotations.
|
||||
groupby : str
|
||||
Column in adata.obs for cell type labels.
|
||||
n_top_genes : int
|
||||
Number of top highly variable genes.
|
||||
n_neighbors : int
|
||||
Number of neighbors for moment computation.
|
||||
mode : str
|
||||
Velocity model: 'stochastic' (fast) or 'dynamical' (accurate).
|
||||
n_jobs : int
|
||||
Parallel jobs for dynamical model fitting.
|
||||
output_dir : str
|
||||
Directory for saving output figures.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AnnData with velocity annotations.
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# ── Settings ──────────────────────────────────────────────────────────────
|
||||
scv.settings.verbosity = 2
|
||||
scv.settings.figdir = output_dir
|
||||
|
||||
# ── Step 1: Check layers ───────────────────────────────────────────────────
|
||||
assert "spliced" in adata.layers, "Missing 'spliced' layer. Run velocyto first."
|
||||
assert "unspliced" in adata.layers, "Missing 'unspliced' layer. Run velocyto first."
|
||||
print(f"Input: {adata.n_obs} cells × {adata.n_vars} genes")
|
||||
|
||||
# ── Step 2: Preprocessing ─────────────────────────────────────────────────
|
||||
print("Step 1/5: Preprocessing...")
|
||||
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=n_top_genes)
|
||||
|
||||
if "neighbors" not in adata.uns:
|
||||
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=30)
|
||||
|
||||
scv.pp.moments(adata, n_pcs=30, n_neighbors=n_neighbors)
|
||||
print(f" {adata.n_vars} velocity genes selected")
|
||||
|
||||
# ── Step 3: Velocity estimation ────────────────────────────────────────────
|
||||
print(f"Step 2/5: Fitting velocity model ({mode})...")
|
||||
if mode == "dynamical":
|
||||
scv.tl.recover_dynamics(adata, n_jobs=n_jobs)
|
||||
scv.tl.velocity(adata, mode=mode)
|
||||
scv.tl.velocity_graph(adata)
|
||||
print(" Velocity graph computed")
|
||||
|
||||
# ── Step 4: Downstream analyses ────────────────────────────────────────────
|
||||
print("Step 3/5: Computing latent time and confidence...")
|
||||
scv.tl.velocity_confidence(adata)
|
||||
scv.tl.velocity_pseudotime(adata)
|
||||
|
||||
if mode == "dynamical":
|
||||
scv.tl.latent_time(adata)
|
||||
|
||||
if groupby in adata.obs.columns:
|
||||
scv.tl.rank_velocity_genes(adata, groupby=groupby, min_corr=0.3)
|
||||
|
||||
# ── Step 5: Visualization ─────────────────────────────────────────────────
|
||||
print("Step 4/5: Generating figures...")
|
||||
|
||||
# Stream plot
|
||||
scv.pl.velocity_embedding_stream(
|
||||
adata,
|
||||
basis="umap",
|
||||
color=groupby,
|
||||
title="RNA Velocity",
|
||||
save=f"{output_dir}/velocity_stream.png",
|
||||
)
|
||||
|
||||
# Arrow plot
|
||||
scv.pl.velocity_embedding(
|
||||
adata,
|
||||
arrow_length=3,
|
||||
arrow_size=2,
|
||||
color=groupby,
|
||||
basis="umap",
|
||||
save=f"{output_dir}/velocity_arrows.png",
|
||||
)
|
||||
|
||||
# Pseudotime
|
||||
scv.pl.scatter(
|
||||
adata,
|
||||
color="velocity_pseudotime",
|
||||
cmap="gnuplot",
|
||||
title="Velocity Pseudotime",
|
||||
save=f"{output_dir}/pseudotime.png",
|
||||
)
|
||||
|
||||
if mode == "dynamical" and "latent_time" in adata.obs:
|
||||
scv.pl.scatter(
|
||||
adata,
|
||||
color="latent_time",
|
||||
color_map="gnuplot",
|
||||
title="Latent Time",
|
||||
save=f"{output_dir}/latent_time.png",
|
||||
)
|
||||
|
||||
# Speed and coherence
|
||||
scv.pl.scatter(
|
||||
adata,
|
||||
c=["velocity_length", "velocity_confidence"],
|
||||
cmap="coolwarm",
|
||||
perc=[5, 95],
|
||||
save=f"{output_dir}/velocity_quality.png",
|
||||
)
|
||||
|
||||
# Top driver genes heatmap (dynamical only)
|
||||
if mode == "dynamical" and "fit_likelihood" in adata.var:
|
||||
top_genes = adata.var["fit_likelihood"].sort_values(ascending=False).index[:50]
|
||||
scv.pl.heatmap(
|
||||
adata,
|
||||
var_names=top_genes,
|
||||
sortby="latent_time",
|
||||
col_color=groupby,
|
||||
n_convolve=50,
|
||||
save=f"{output_dir}/driver_gene_heatmap.png",
|
||||
)
|
||||
|
||||
# ── Step 6: Save results ───────────────────────────────────────────────────
|
||||
print("Step 5/5: Saving results...")
|
||||
output_h5ad = os.path.join(output_dir, "adata_velocity.h5ad")
|
||||
adata.write_h5ad(output_h5ad)
|
||||
print(f" Saved to {output_h5ad}")
|
||||
|
||||
# Summary statistics
|
||||
confidence = adata.obs["velocity_confidence"].dropna()
|
||||
print("\nSummary:")
|
||||
print(f" Velocity model: {mode}")
|
||||
print(f" Cells: {adata.n_obs}")
|
||||
print(f" Velocity genes: {adata.n_vars}")
|
||||
print(f" Mean velocity confidence: {confidence.mean():.3f}")
|
||||
print(f" High-confidence cells (>0.7): {(confidence > 0.7).sum()} ({(confidence > 0.7).mean():.1%})")
|
||||
|
||||
if mode == "dynamical" and "fit_likelihood" in adata.var:
|
||||
good_genes = (adata.var["fit_likelihood"] > 0.1).sum()
|
||||
print(f" Well-fit genes (likelihood>0.1): {good_genes}")
|
||||
|
||||
print(f"\nOutput files saved to: {output_dir}/")
|
||||
return adata
|
||||
|
||||
|
||||
def load_from_loom(loom_path, processed_h5ad=None):
|
||||
"""
|
||||
Load velocity data from velocyto loom file.
|
||||
|
||||
Args:
|
||||
loom_path: Path to velocyto output loom file
|
||||
processed_h5ad: Optional path to pre-processed Scanpy h5ad file
|
||||
"""
|
||||
adata_loom = scv.read(loom_path, cache=True)
|
||||
|
||||
if processed_h5ad:
|
||||
adata_processed = sc.read_h5ad(processed_h5ad)
|
||||
# Merge: keep processed metadata and add velocity layers
|
||||
adata = scv.utils.merge(adata_processed, adata_loom)
|
||||
else:
|
||||
adata = adata_loom
|
||||
# Run basic Scanpy pipeline
|
||||
sc.pp.normalize_total(adata, target_sum=1e4)
|
||||
sc.pp.log1p(adata)
|
||||
sc.pp.highly_variable_genes(adata, n_top_genes=3000)
|
||||
sc.pp.pca(adata)
|
||||
sc.pp.neighbors(adata)
|
||||
sc.tl.umap(adata)
|
||||
sc.tl.leiden(adata, resolution=0.5)
|
||||
|
||||
return adata
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage with simulated data (for testing)
|
||||
print("scVelo RNA Velocity Workflow - Demo Mode")
|
||||
print("=" * 50)
|
||||
|
||||
# Load example dataset
|
||||
adata = scv.datasets.pancreas()
|
||||
print(f"Loaded pancreas dataset: {adata}")
|
||||
|
||||
# Run analysis
|
||||
adata = run_velocity_analysis(
|
||||
adata,
|
||||
groupby="clusters",
|
||||
n_top_genes=2000,
|
||||
mode="dynamical",
|
||||
n_jobs=2,
|
||||
output_dir="pancreas_velocity",
|
||||
)
|
||||
|
||||
print("\nAnalysis complete!")
|
||||
print(f"Key results:")
|
||||
print(f" adata.layers['velocity']: velocity per gene per cell")
|
||||
print(f" adata.obs['latent_time']: pseudotime from dynamics")
|
||||
print(f" adata.obs['velocity_confidence']: per-cell confidence")
|
||||
if "rank_velocity_genes" in adata.uns:
|
||||
print(f" adata.uns['rank_velocity_genes']: driver genes per cluster")
|
||||
Reference in New Issue
Block a user