mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
- Introduced a comprehensive RNA velocity analysis pipeline using scVelo, including data loading, preprocessing, velocity estimation, and visualization. - Added a script for running RNA velocity analysis with customizable parameters and output options. - Created detailed documentation for IQ-TREE 2 phylogenetic inference, covering command syntax, model selection, bootstrapping methods, and output interpretation. - Included references for velocity models and their mathematical framework, along with a comparison of different models. - Enhanced the scVelo skill documentation with installation instructions, use cases, and best practices for RNA velocity analysis.
5.5 KiB
5.5 KiB
MDAnalysis Analysis Reference
MDAnalysis Universe and AtomGroup
import MDAnalysis as mda
# Load Universe
u = mda.Universe("topology.pdb", "trajectory.dcd")
# or for single structure:
u = mda.Universe("structure.pdb")
# Key attributes
print(u.atoms.n_atoms) # Total atoms
print(u.residues.n_residues) # Total residues
print(u.trajectory.n_frames) # Number of frames
print(u.trajectory.dt) # Time step in ps
print(u.trajectory.totaltime) # Total simulation time in ps
Atom Selection Language
MDAnalysis uses a rich selection language:
# Basic selections
protein = u.select_atoms("protein")
backbone = u.select_atoms("backbone") # CA, N, C, O
calpha = u.select_atoms("name CA")
water = u.select_atoms("resname WAT or resname HOH or resname TIP3")
ligand = u.select_atoms("resname LIG")
# By residue number
region = u.select_atoms("resid 10:50")
specific = u.select_atoms("resid 45 and name CA")
# By proximity
near_ligand = u.select_atoms("protein and around 5.0 resname LIG")
# By property
charged = u.select_atoms("resname ARG LYS ASP GLU")
hydrophobic = u.select_atoms("resname ALA VAL LEU ILE PRO PHE TRP MET")
# Boolean combinations
active_site = u.select_atoms("(resid 100 102 145 200) and protein")
# Inverse
not_water = u.select_atoms("not (resname WAT HOH)")
Common Analysis Modules
RMSD and RMSF
from MDAnalysis.analysis import rms, align
# Align trajectory to first frame
align.AlignTraj(u, u, select='backbone', in_memory=True).run()
# RMSD
R = rms.RMSD(u, u, select='backbone', groupselections=['name CA'])
R.run()
# R.results.rmsd: shape (n_frames, 3) = [frame, time, RMSD]
# RMSF (per-atom fluctuations)
from MDAnalysis.analysis.rms import RMSF
rmsf = RMSF(u.select_atoms('backbone')).run()
# rmsf.results.rmsf: per-atom RMSF values in Angstroms
Radius of Gyration
rg = []
for ts in u.trajectory:
rg.append(u.select_atoms("protein").radius_of_gyration())
import numpy as np
print(f"Mean Rg: {np.mean(rg):.2f} Å")
Secondary Structure Analysis
from MDAnalysis.analysis.dssp import DSSP
# DSSP secondary structure assignment per frame
dssp = DSSP(u).run()
# dssp.results.dssp: per-residue per-frame secondary structure codes
# H = alpha-helix, E = beta-strand, C = coil
Hydrogen Bonds
from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis
hbonds = HydrogenBondAnalysis(
u,
donors_sel="protein and name N",
acceptors_sel="protein and name O",
d_h_cutoff=1.2, # donor-H distance (Å)
d_a_cutoff=3.0, # donor-acceptor distance (Å)
d_h_a_angle_cutoff=150 # D-H-A angle (degrees)
)
hbonds.run()
# Count H-bonds per frame
import pandas as pd
df = pd.DataFrame(hbonds.results.hbonds,
columns=['frame', 'donor_ix', 'hydrogen_ix', 'acceptor_ix',
'DA_dist', 'DHA_angle'])
Principal Component Analysis (PCA)
from MDAnalysis.analysis import pca
pca_analysis = pca.PCA(u, select='backbone', align=True).run()
# PC variances
print(pca_analysis.results.variance[:5]) # % variance of first 5 PCs
# Project trajectory onto PCs
projected = pca_analysis.transform(u.select_atoms('backbone'), n_components=3)
# Shape: (n_frames, n_components)
Free Energy Surface (FES)
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
def plot_free_energy_surface(x, y, bins=50, T=300, xlabel="PC1", ylabel="PC2",
output="fes.png"):
"""
Compute 2D free energy surface from two order parameters.
FES = -kT * ln(P(x,y))
"""
kB = 0.0083144621 # kJ/mol/K
kT = kB * T
# 2D histogram
H, xedges, yedges = np.histogram2d(x, y, bins=bins, density=True)
H = H.T
# Free energy
H_safe = np.where(H > 0, H, np.nan)
fes = -kT * np.log(H_safe)
fes -= np.nanmin(fes) # Shift minimum to 0
# Plot
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.contourf(xedges[:-1], yedges[:-1], fes, levels=20, cmap='RdYlBu_r')
plt.colorbar(im, ax=ax, label='Free Energy (kJ/mol)')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
plt.savefig(output, dpi=150, bbox_inches='tight')
return fig
Trajectory Formats Supported
| Format | Extension | Notes |
|---|---|---|
| DCD | .dcd |
CHARMM/NAMD binary; widely used |
| XTC | .xtc |
GROMACS compressed |
| TRR | .trr |
GROMACS full precision |
| NetCDF | .nc |
AMBER format |
| LAMMPS | .lammpstrj |
LAMMPS dump |
| HDF5 | .h5md |
H5MD standard |
| PDB | .pdb |
Multi-model PDB |
MDAnalysis Interoperability
# Convert to numpy
positions = u.atoms.positions # Current frame: shape (N, 3)
# Write to PDB
with mda.Writer("frame_10.pdb", u.atoms.n_atoms) as W:
u.trajectory[10] # Move to frame 10
W.write(u.atoms)
# Write trajectory subset
with mda.Writer("protein_traj.dcd", u.select_atoms("protein").n_atoms) as W:
for ts in u.trajectory:
W.write(u.select_atoms("protein"))
# Convert to MDTraj (for compatibility)
# import mdtraj as md
# traj = md.load("trajectory.dcd", top="topology.pdb")
Performance Tips
- Use
in_memory=Truefor AlignTraj when RAM allows (much faster iteration) - Select minimal atoms before analysis to reduce memory/compute
- Use multiprocessing for independent frame analyses
- Process in chunks for very long trajectories using
start/stop/stepparameters:
# Analyze every 10th frame from frame 100 to 1000
R.run(start=100, stop=1000, step=10)