mirror of https://github.com/K-Dense-AI/claude-scientific-skills.git synced 2026-03-27 07:09:27 +08:00

Files

huangkuanlin 7f94783fab Add scVelo RNA velocity analysis workflow and IQ-TREE reference documentation

- Introduced a comprehensive RNA velocity analysis pipeline using scVelo, including data loading, preprocessing, velocity estimation, and visualization.
- Added a script for running RNA velocity analysis with customizable parameters and output options.
- Created detailed documentation for IQ-TREE 2 phylogenetic inference, covering command syntax, model selection, bootstrapping methods, and output interpretation.
- Included references for velocity models and their mathematical framework, along with a comparison of different models.
- Enhanced the scVelo skill documentation with installation instructions, use cases, and best practices for RNA velocity analysis.

2026-03-03 07:15:36 -05:00

5.5 KiB

Raw Blame History

MDAnalysis Analysis Reference

MDAnalysis Universe and AtomGroup

import MDAnalysis as mda

# Load Universe
u = mda.Universe("topology.pdb", "trajectory.dcd")
# or for single structure:
u = mda.Universe("structure.pdb")

# Key attributes
print(u.atoms.n_atoms)          # Total atoms
print(u.residues.n_residues)    # Total residues
print(u.trajectory.n_frames)   # Number of frames
print(u.trajectory.dt)         # Time step in ps
print(u.trajectory.totaltime)  # Total simulation time in ps

Atom Selection Language

MDAnalysis uses a rich selection language:

# Basic selections
protein = u.select_atoms("protein")
backbone = u.select_atoms("backbone")  # CA, N, C, O
calpha = u.select_atoms("name CA")
water = u.select_atoms("resname WAT or resname HOH or resname TIP3")
ligand = u.select_atoms("resname LIG")

# By residue number
region = u.select_atoms("resid 10:50")
specific = u.select_atoms("resid 45 and name CA")

# By proximity
near_ligand = u.select_atoms("protein and around 5.0 resname LIG")

# By property
charged = u.select_atoms("resname ARG LYS ASP GLU")
hydrophobic = u.select_atoms("resname ALA VAL LEU ILE PRO PHE TRP MET")

# Boolean combinations
active_site = u.select_atoms("(resid 100 102 145 200) and protein")

# Inverse
not_water = u.select_atoms("not (resname WAT HOH)")

Common Analysis Modules

RMSD and RMSF

from MDAnalysis.analysis import rms, align

# Align trajectory to first frame
align.AlignTraj(u, u, select='backbone', in_memory=True).run()

# RMSD
R = rms.RMSD(u, u, select='backbone', groupselections=['name CA'])
R.run()
# R.results.rmsd: shape (n_frames, 3) = [frame, time, RMSD]

# RMSF (per-atom fluctuations)
from MDAnalysis.analysis.rms import RMSF
rmsf = RMSF(u.select_atoms('backbone')).run()
# rmsf.results.rmsf: per-atom RMSF values in Angstroms

Radius of Gyration

rg = []
for ts in u.trajectory:
    rg.append(u.select_atoms("protein").radius_of_gyration())
import numpy as np
print(f"Mean Rg: {np.mean(rg):.2f} Å")

Secondary Structure Analysis

from MDAnalysis.analysis.dssp import DSSP

# DSSP secondary structure assignment per frame
dssp = DSSP(u).run()
# dssp.results.dssp: per-residue per-frame secondary structure codes
# H = alpha-helix, E = beta-strand, C = coil

Hydrogen Bonds

from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis

hbonds = HydrogenBondAnalysis(
    u,
    donors_sel="protein and name N",
    acceptors_sel="protein and name O",
    d_h_cutoff=1.2,          # donor-H distance (Å)
    d_a_cutoff=3.0,          # donor-acceptor distance (Å)
    d_h_a_angle_cutoff=150   # D-H-A angle (degrees)
)
hbonds.run()

# Count H-bonds per frame
import pandas as pd
df = pd.DataFrame(hbonds.results.hbonds,
                  columns=['frame', 'donor_ix', 'hydrogen_ix', 'acceptor_ix',
                           'DA_dist', 'DHA_angle'])

Principal Component Analysis (PCA)

from MDAnalysis.analysis import pca

pca_analysis = pca.PCA(u, select='backbone', align=True).run()

# PC variances
print(pca_analysis.results.variance[:5])  # % variance of first 5 PCs

# Project trajectory onto PCs
projected = pca_analysis.transform(u.select_atoms('backbone'), n_components=3)
# Shape: (n_frames, n_components)

Free Energy Surface (FES)

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

def plot_free_energy_surface(x, y, bins=50, T=300, xlabel="PC1", ylabel="PC2",
                              output="fes.png"):
    """
    Compute 2D free energy surface from two order parameters.
    FES = -kT * ln(P(x,y))
    """
    kB = 0.0083144621  # kJ/mol/K
    kT = kB * T

    # 2D histogram
    H, xedges, yedges = np.histogram2d(x, y, bins=bins, density=True)
    H = H.T

    # Free energy
    H_safe = np.where(H > 0, H, np.nan)
    fes = -kT * np.log(H_safe)
    fes -= np.nanmin(fes)  # Shift minimum to 0

    # Plot
    fig, ax = plt.subplots(figsize=(8, 6))
    im = ax.contourf(xedges[:-1], yedges[:-1], fes, levels=20, cmap='RdYlBu_r')
    plt.colorbar(im, ax=ax, label='Free Energy (kJ/mol)')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.savefig(output, dpi=150, bbox_inches='tight')
    return fig

Trajectory Formats Supported

Format	Extension	Notes
DCD	`.dcd`	CHARMM/NAMD binary; widely used
XTC	`.xtc`	GROMACS compressed
TRR	`.trr`	GROMACS full precision
NetCDF	`.nc`	AMBER format
LAMMPS	`.lammpstrj`	LAMMPS dump
HDF5	`.h5md`	H5MD standard
PDB	`.pdb`	Multi-model PDB

MDAnalysis Interoperability

# Convert to numpy
positions = u.atoms.positions  # Current frame: shape (N, 3)

# Write to PDB
with mda.Writer("frame_10.pdb", u.atoms.n_atoms) as W:
    u.trajectory[10]  # Move to frame 10
    W.write(u.atoms)

# Write trajectory subset
with mda.Writer("protein_traj.dcd", u.select_atoms("protein").n_atoms) as W:
    for ts in u.trajectory:
        W.write(u.select_atoms("protein"))

# Convert to MDTraj (for compatibility)
# import mdtraj as md
# traj = md.load("trajectory.dcd", top="topology.pdb")

Performance Tips

Use in_memory=True for AlignTraj when RAM allows (much faster iteration)
Select minimal atoms before analysis to reduce memory/compute
Use multiprocessing for independent frame analyses
Process in chunks for very long trajectories using start/stop/step parameters:

# Analyze every 10th frame from frame 100 to 1000
R.run(start=100, stop=1000, step=10)

5.5 KiB Raw Blame History

MDAnalysis Analysis Reference

MDAnalysis Universe and AtomGroup

Atom Selection Language

Common Analysis Modules

RMSD and RMSF

Radius of Gyration

Secondary Structure Analysis

Hydrogen Bonds

Principal Component Analysis (PCA)

Free Energy Surface (FES)

Trajectory Formats Supported

MDAnalysis Interoperability

Performance Tips

5.5 KiB

Raw Blame History