mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-01-26 16:58:56 +08:00
Adds comprehensive toolkit for analyzing Neuropixels high-density neural recordings using SpikeInterface, Allen Institute, and IBL best practices. Features: - Data loading from SpikeGLX, Open Ephys, and NWB formats - Preprocessing pipelines (filtering, phase shift, CAR, bad channel detection) - Motion/drift estimation and correction - Spike sorting integration (Kilosort4, SpykingCircus2, Mountainsort5) - Quality metrics computation (SNR, ISI violations, presence ratio) - Automated curation using Allen/IBL criteria - AI-assisted visual curation for uncertain units - Export to Phy and NWB formats Supports Neuropixels 1.0 and 2.0 probes. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
9.6 KiB
9.6 KiB
Post-Processing & Analysis Reference
Comprehensive guide to quality metrics, visualization, and analysis of sorted Neuropixels data.
Sorting Analyzer
The SortingAnalyzer is the central object for post-processing.
Create Analyzer
import spikeinterface.full as si
# Create analyzer
analyzer = si.create_sorting_analyzer(
sorting,
recording,
sparse=True, # Use sparse representation
format='binary_folder', # Storage format
folder='analyzer_output' # Save location
)
Compute Extensions
# Compute all standard extensions
analyzer.compute('random_spikes') # Random spike selection
analyzer.compute('waveforms') # Extract waveforms
analyzer.compute('templates') # Compute templates
analyzer.compute('noise_levels') # Noise estimation
analyzer.compute('principal_components') # PCA
analyzer.compute('spike_amplitudes') # Amplitude per spike
analyzer.compute('correlograms') # Auto/cross correlograms
analyzer.compute('unit_locations') # Unit locations
analyzer.compute('spike_locations') # Per-spike locations
analyzer.compute('template_similarity') # Template similarity matrix
analyzer.compute('quality_metrics') # Quality metrics
# Or compute multiple at once
analyzer.compute([
'random_spikes', 'waveforms', 'templates', 'noise_levels',
'principal_components', 'spike_amplitudes', 'correlograms',
'unit_locations', 'quality_metrics'
])
Save and Load
# Save
analyzer.save_as(folder='analyzer_saved', format='binary_folder')
# Load
analyzer = si.load_sorting_analyzer('analyzer_saved')
Quality Metrics
Compute Metrics
analyzer.compute('quality_metrics')
qm = analyzer.get_extension('quality_metrics').get_data()
print(qm)
Available Metrics
| Metric | Description | Good Values |
|---|---|---|
snr |
Signal-to-noise ratio | > 5 |
isi_violations_ratio |
ISI violation ratio | < 0.01 (1%) |
isi_violations_count |
ISI violation count | Low |
presence_ratio |
Fraction of recording with spikes | > 0.9 |
firing_rate |
Spikes per second | 0.1-50 Hz |
amplitude_cutoff |
Estimated missed spikes | < 0.1 |
amplitude_median |
Median spike amplitude | - |
amplitude_cv |
Coefficient of variation | < 0.5 |
drift_ptp |
Peak-to-peak drift (um) | < 40 |
drift_std |
Standard deviation of drift | < 10 |
drift_mad |
Median absolute deviation | < 10 |
sliding_rp_violation |
Sliding refractory period | < 0.05 |
sync_spike_2 |
Synchrony with other units | < 0.5 |
isolation_distance |
Mahalanobis distance | > 20 |
l_ratio |
L-ratio (isolation) | < 0.1 |
d_prime |
Discriminability | > 5 |
nn_hit_rate |
Nearest neighbor hit rate | > 0.9 |
nn_miss_rate |
Nearest neighbor miss rate | < 0.1 |
silhouette_score |
Cluster silhouette | > 0.5 |
Compute Specific Metrics
analyzer.compute(
'quality_metrics',
metric_names=['snr', 'isi_violations_ratio', 'presence_ratio', 'firing_rate']
)
Custom Quality Thresholds
qm = analyzer.get_extension('quality_metrics').get_data()
# Define quality criteria
quality_criteria = {
'snr': ('>', 5),
'isi_violations_ratio': ('<', 0.01),
'presence_ratio': ('>', 0.9),
'firing_rate': ('>', 0.1),
'amplitude_cutoff': ('<', 0.1),
}
# Filter good units
good_units = qm.query(
"(snr > 5) & (isi_violations_ratio < 0.01) & (presence_ratio > 0.9)"
).index.tolist()
print(f"Good units: {len(good_units)}/{len(qm)}")
Waveforms & Templates
Extract Waveforms
analyzer.compute('waveforms', ms_before=1.5, ms_after=2.5, max_spikes_per_unit=500)
# Get waveforms for a unit
waveforms = analyzer.get_extension('waveforms').get_waveforms(unit_id=0)
print(f"Shape: {waveforms.shape}") # (n_spikes, n_samples, n_channels)
Compute Templates
analyzer.compute('templates', operators=['average', 'std', 'median'])
# Get template
templates_ext = analyzer.get_extension('templates')
template = templates_ext.get_unit_template(unit_id=0, operator='average')
Template Similarity
analyzer.compute('template_similarity')
sim = analyzer.get_extension('template_similarity').get_data()
# Matrix of cosine similarities between templates
Unit Locations
Compute Locations
analyzer.compute('unit_locations', method='monopolar_triangulation')
locations = analyzer.get_extension('unit_locations').get_data()
print(locations) # x, y coordinates per unit
Spike Locations
analyzer.compute('spike_locations', method='center_of_mass')
spike_locs = analyzer.get_extension('spike_locations').get_data()
Location Methods
'center_of_mass'- Fast, less accurate'monopolar_triangulation'- More accurate, slower'grid_convolution'- Good balance
Correlograms
Auto-correlograms
analyzer.compute('correlograms', window_ms=50, bin_ms=1)
correlograms, bins = analyzer.get_extension('correlograms').get_data()
# correlograms shape: (n_units, n_units, n_bins)
# Auto-correlogram for unit i: correlograms[i, i, :]
# Cross-correlogram units i,j: correlograms[i, j, :]
Visualization
Probe Map
si.plot_probe_map(recording, with_channel_ids=True)
Unit Templates
# All units
si.plot_unit_templates(analyzer)
# Specific units
si.plot_unit_templates(analyzer, unit_ids=[0, 1, 2])
Waveforms
# Plot waveforms with template
si.plot_unit_waveforms(analyzer, unit_ids=[0])
# Waveform density
si.plot_unit_waveforms_density_map(analyzer, unit_id=0)
Raster Plot
si.plot_rasters(sorting, time_range=(0, 10)) # First 10 seconds
Amplitudes
analyzer.compute('spike_amplitudes')
si.plot_amplitudes(analyzer)
# Distribution
si.plot_all_amplitudes_distributions(analyzer)
Correlograms
# Auto-correlograms
si.plot_autocorrelograms(analyzer, unit_ids=[0, 1, 2])
# Cross-correlograms
si.plot_crosscorrelograms(analyzer, unit_ids=[0, 1])
Quality Metrics
# Summary plot
si.plot_quality_metrics(analyzer)
# Specific metric distribution
import matplotlib.pyplot as plt
qm = analyzer.get_extension('quality_metrics').get_data()
plt.hist(qm['snr'], bins=50)
plt.xlabel('SNR')
plt.ylabel('Count')
Unit Locations on Probe
si.plot_unit_locations(analyzer)
Drift Map
si.plot_drift_raster(sorting, recording)
Summary Plot
# Comprehensive unit summary
si.plot_unit_summary(analyzer, unit_id=0)
LFP Analysis
Load LFP Data
lfp = si.read_spikeglx('/path/to/data', stream_id='imec0.lf')
print(f"LFP: {lfp.get_sampling_frequency()} Hz")
Basic LFP Processing
# Downsample if needed
lfp_ds = si.resample(lfp, resample_rate=1000)
# Common average reference
lfp_car = si.common_reference(lfp_ds, reference='global', operator='median')
Extract LFP Traces
import numpy as np
# Get traces (channels x samples)
traces = lfp.get_traces(start_frame=0, end_frame=30000)
# Specific channels
traces = lfp.get_traces(channel_ids=[0, 1, 2])
Spectral Analysis
from scipy import signal
import matplotlib.pyplot as plt
# Get single channel
trace = lfp.get_traces(channel_ids=[0]).flatten()
fs = lfp.get_sampling_frequency()
# Power spectrum
freqs, psd = signal.welch(trace, fs, nperseg=4096)
plt.semilogy(freqs, psd)
plt.xlabel('Frequency (Hz)')
plt.ylabel('Power')
plt.xlim(0, 100)
Spectrogram
f, t, Sxx = signal.spectrogram(trace, fs, nperseg=2048, noverlap=1024)
plt.pcolormesh(t, f, 10*np.log10(Sxx), shading='gouraud')
plt.ylabel('Frequency (Hz)')
plt.xlabel('Time (s)')
plt.ylim(0, 100)
plt.colorbar(label='Power (dB)')
Export Formats
Export to Phy
si.export_to_phy(
analyzer,
output_folder='phy_export',
compute_pc_features=True,
compute_amplitudes=True,
copy_binary=True
)
# Then: phy template-gui phy_export/params.py
Export to NWB
from spikeinterface.exporters import export_to_nwb
export_to_nwb(
recording,
sorting,
'output.nwb',
metadata=dict(
session_description='Neuropixels recording',
experimenter='Name',
lab='Lab name',
institution='Institution'
)
)
Export Report
si.export_report(
analyzer,
output_folder='report',
remove_if_exists=True,
format='html'
)
Complete Analysis Pipeline
import spikeinterface.full as si
def analyze_sorting(recording, sorting, output_dir):
"""Complete post-processing pipeline."""
# Create analyzer
analyzer = si.create_sorting_analyzer(
sorting, recording,
sparse=True,
folder=f'{output_dir}/analyzer'
)
# Compute all extensions
print("Computing extensions...")
analyzer.compute(['random_spikes', 'waveforms', 'templates', 'noise_levels'])
analyzer.compute(['principal_components', 'spike_amplitudes'])
analyzer.compute(['correlograms', 'unit_locations', 'template_similarity'])
analyzer.compute('quality_metrics')
# Get quality metrics
qm = analyzer.get_extension('quality_metrics').get_data()
# Filter good units
good_units = qm.query(
"(snr > 5) & (isi_violations_ratio < 0.01) & (presence_ratio > 0.9)"
).index.tolist()
print(f"Quality filtering: {len(good_units)}/{len(qm)} units passed")
# Export
si.export_to_phy(analyzer, f'{output_dir}/phy')
si.export_report(analyzer, f'{output_dir}/report')
# Save metrics
qm.to_csv(f'{output_dir}/quality_metrics.csv')
return analyzer, qm, good_units
# Usage
analyzer, qm, good_units = analyze_sorting(recording, sorting, 'output/')