Initial commit for neuropixels-analysis
This commit is contained in:
339
references/SPIKE_SORTING.md
Normal file
339
references/SPIKE_SORTING.md
Normal file
@@ -0,0 +1,339 @@
|
||||
# Spike Sorting Reference
|
||||
|
||||
Comprehensive guide to spike sorting Neuropixels data.
|
||||
|
||||
## Available Sorters
|
||||
|
||||
| Sorter | GPU Required | Speed | Quality | Best For |
|
||||
|--------|--------------|-------|---------|----------|
|
||||
| **Kilosort4** | Yes (CUDA) | Fast | Excellent | Production use |
|
||||
| **Kilosort3** | Yes (CUDA) | Fast | Very Good | Legacy compatibility |
|
||||
| **Kilosort2.5** | Yes (CUDA) | Fast | Good | Older pipelines |
|
||||
| **SpykingCircus2** | No | Medium | Good | CPU-only systems |
|
||||
| **Mountainsort5** | No | Medium | Good | Small recordings |
|
||||
| **Tridesclous2** | No | Medium | Good | Interactive sorting |
|
||||
|
||||
## Kilosort4 (Recommended)
|
||||
|
||||
### Installation
|
||||
```bash
|
||||
pip install kilosort
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
```python
|
||||
import spikeinterface.full as si
|
||||
|
||||
# Run Kilosort4
|
||||
sorting = si.run_sorter(
|
||||
'kilosort4',
|
||||
recording,
|
||||
output_folder='ks4_output',
|
||||
verbose=True
|
||||
)
|
||||
|
||||
print(f"Found {len(sorting.unit_ids)} units")
|
||||
```
|
||||
|
||||
### Custom Parameters
|
||||
```python
|
||||
sorting = si.run_sorter(
|
||||
'kilosort4',
|
||||
recording,
|
||||
output_folder='ks4_output',
|
||||
# Detection
|
||||
Th_universal=9, # Spike detection threshold
|
||||
Th_learned=8, # Learned threshold
|
||||
# Templates
|
||||
dmin=15, # Min vertical distance between templates (um)
|
||||
dminx=12, # Min horizontal distance (um)
|
||||
nblocks=5, # Number of non-rigid blocks
|
||||
# Clustering
|
||||
max_channel_distance=None, # Max distance for template channel
|
||||
# Output
|
||||
do_CAR=False, # Skip CAR (done in preprocessing)
|
||||
skip_kilosort_preprocessing=True,
|
||||
save_extra_kwargs=True
|
||||
)
|
||||
```
|
||||
|
||||
### Kilosort4 Full Parameters
|
||||
```python
|
||||
# Get all available parameters
|
||||
params = si.get_default_sorter_params('kilosort4')
|
||||
print(params)
|
||||
|
||||
# Key parameters:
|
||||
ks4_params = {
|
||||
# Detection
|
||||
'Th_universal': 9, # Universal threshold for spike detection
|
||||
'Th_learned': 8, # Threshold for learned templates
|
||||
'spkTh': -6, # Spike threshold during extraction
|
||||
|
||||
# Clustering
|
||||
'dmin': 15, # Min distance between clusters (um)
|
||||
'dminx': 12, # Min horizontal distance (um)
|
||||
'nblocks': 5, # Blocks for non-rigid drift correction
|
||||
|
||||
# Templates
|
||||
'n_templates': 6, # Number of universal templates per group
|
||||
'nt': 61, # Number of time samples in template
|
||||
|
||||
# Performance
|
||||
'batch_size': 60000, # Batch size in samples
|
||||
'nfilt_factor': 8, # Factor for number of filters
|
||||
}
|
||||
```
|
||||
|
||||
## Kilosort3
|
||||
|
||||
### Usage
|
||||
```python
|
||||
sorting = si.run_sorter(
|
||||
'kilosort3',
|
||||
recording,
|
||||
output_folder='ks3_output',
|
||||
# Key parameters
|
||||
detect_threshold=6,
|
||||
projection_threshold=[9, 9],
|
||||
preclust_threshold=8,
|
||||
car=False, # CAR done in preprocessing
|
||||
freq_min=300,
|
||||
)
|
||||
```
|
||||
|
||||
## SpykingCircus2 (CPU-Only)
|
||||
|
||||
### Installation
|
||||
```bash
|
||||
pip install spykingcircus
|
||||
```
|
||||
|
||||
### Usage
|
||||
```python
|
||||
sorting = si.run_sorter(
|
||||
'spykingcircus2',
|
||||
recording,
|
||||
output_folder='sc2_output',
|
||||
# Parameters
|
||||
detect_threshold=5,
|
||||
selection_method='all',
|
||||
)
|
||||
```
|
||||
|
||||
## Mountainsort5 (CPU-Only)
|
||||
|
||||
### Installation
|
||||
```bash
|
||||
pip install mountainsort5
|
||||
```
|
||||
|
||||
### Usage
|
||||
```python
|
||||
sorting = si.run_sorter(
|
||||
'mountainsort5',
|
||||
recording,
|
||||
output_folder='ms5_output',
|
||||
# Parameters
|
||||
detect_threshold=5.0,
|
||||
scheme='2', # '1', '2', or '3'
|
||||
)
|
||||
```
|
||||
|
||||
## Running Multiple Sorters
|
||||
|
||||
### Compare Sorters
|
||||
```python
|
||||
# Run multiple sorters
|
||||
sorting_ks4 = si.run_sorter('kilosort4', recording, output_folder='ks4/')
|
||||
sorting_sc2 = si.run_sorter('spykingcircus2', recording, output_folder='sc2/')
|
||||
sorting_ms5 = si.run_sorter('mountainsort5', recording, output_folder='ms5/')
|
||||
|
||||
# Compare results
|
||||
comparison = si.compare_multiple_sorters(
|
||||
[sorting_ks4, sorting_sc2, sorting_ms5],
|
||||
name_list=['KS4', 'SC2', 'MS5']
|
||||
)
|
||||
|
||||
# Get agreement scores
|
||||
agreement = comparison.get_agreement_sorting()
|
||||
```
|
||||
|
||||
### Ensemble Sorting
|
||||
```python
|
||||
# Create consensus sorting
|
||||
sorting_ensemble = si.create_ensemble_sorting(
|
||||
[sorting_ks4, sorting_sc2, sorting_ms5],
|
||||
voting_method='agreement',
|
||||
min_agreement=2 # Unit must be found by at least 2 sorters
|
||||
)
|
||||
```
|
||||
|
||||
## Sorting in Docker/Singularity
|
||||
|
||||
### Using Docker
|
||||
```python
|
||||
sorting = si.run_sorter(
|
||||
'kilosort3',
|
||||
recording,
|
||||
output_folder='ks3_docker/',
|
||||
docker_image='spikeinterface/kilosort3-compiled-base:latest',
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
### Using Singularity
|
||||
```python
|
||||
sorting = si.run_sorter(
|
||||
'kilosort3',
|
||||
recording,
|
||||
output_folder='ks3_singularity/',
|
||||
singularity_image='/path/to/kilosort3.sif',
|
||||
verbose=True
|
||||
)
|
||||
```
|
||||
|
||||
## Long Recording Strategy
|
||||
|
||||
### Concatenate Recordings
|
||||
```python
|
||||
# Multiple recording files
|
||||
recordings = [
|
||||
si.read_spikeglx(f'/path/to/recording_{i}', stream_id='imec0.ap')
|
||||
for i in range(3)
|
||||
]
|
||||
|
||||
# Concatenate
|
||||
recording_concat = si.concatenate_recordings(recordings)
|
||||
|
||||
# Sort
|
||||
sorting = si.run_sorter('kilosort4', recording_concat, output_folder='ks4/')
|
||||
|
||||
# Split back by original recording
|
||||
sortings_split = si.split_sorting(sorting, recording_concat)
|
||||
```
|
||||
|
||||
### Sort by Segment
|
||||
```python
|
||||
# For very long recordings, sort segments separately
|
||||
from pathlib import Path
|
||||
|
||||
segments_output = Path('sorting_segments')
|
||||
sortings = []
|
||||
|
||||
for i, segment in enumerate(recording.split_by_times([0, 3600, 7200, 10800])):
|
||||
sorting_seg = si.run_sorter(
|
||||
'kilosort4',
|
||||
segment,
|
||||
output_folder=segments_output / f'segment_{i}'
|
||||
)
|
||||
sortings.append(sorting_seg)
|
||||
```
|
||||
|
||||
## Post-Sorting Curation
|
||||
|
||||
### Manual Curation with Phy
|
||||
```python
|
||||
# Export to Phy format
|
||||
analyzer = si.create_sorting_analyzer(sorting, recording)
|
||||
analyzer.compute(['random_spikes', 'waveforms', 'templates'])
|
||||
si.export_to_phy(analyzer, output_folder='phy_export/')
|
||||
|
||||
# Open Phy
|
||||
# Run in terminal: phy template-gui phy_export/params.py
|
||||
```
|
||||
|
||||
### Load Phy Curation
|
||||
```python
|
||||
# After manual curation in Phy
|
||||
sorting_curated = si.read_phy('phy_export/')
|
||||
|
||||
# Or apply Phy labels
|
||||
sorting_curated = si.apply_phy_curation(sorting, 'phy_export/')
|
||||
```
|
||||
|
||||
### Automatic Curation
|
||||
```python
|
||||
# Remove units below quality threshold
|
||||
analyzer = si.create_sorting_analyzer(sorting, recording)
|
||||
analyzer.compute('quality_metrics')
|
||||
|
||||
qm = analyzer.get_extension('quality_metrics').get_data()
|
||||
|
||||
# Define quality criteria
|
||||
query = "(snr > 5) & (isi_violations_ratio < 0.01) & (presence_ratio > 0.9)"
|
||||
good_unit_ids = qm.query(query).index.tolist()
|
||||
|
||||
sorting_clean = sorting.select_units(good_unit_ids)
|
||||
print(f"Kept {len(good_unit_ids)}/{len(sorting.unit_ids)} units")
|
||||
```
|
||||
|
||||
## Sorting Metrics
|
||||
|
||||
### Check Sorter Output
|
||||
```python
|
||||
# Basic stats
|
||||
print(f"Units found: {len(sorting.unit_ids)}")
|
||||
print(f"Total spikes: {sorting.get_total_num_spikes()}")
|
||||
|
||||
# Per-unit spike counts
|
||||
for unit_id in sorting.unit_ids[:10]:
|
||||
n_spikes = len(sorting.get_unit_spike_train(unit_id))
|
||||
print(f"Unit {unit_id}: {n_spikes} spikes")
|
||||
```
|
||||
|
||||
### Firing Rates
|
||||
```python
|
||||
# Compute firing rates
|
||||
duration = recording.get_total_duration()
|
||||
for unit_id in sorting.unit_ids:
|
||||
n_spikes = len(sorting.get_unit_spike_train(unit_id))
|
||||
fr = n_spikes / duration
|
||||
print(f"Unit {unit_id}: {fr:.2f} Hz")
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Out of GPU Memory**
|
||||
```python
|
||||
# Reduce batch size
|
||||
sorting = si.run_sorter(
|
||||
'kilosort4',
|
||||
recording,
|
||||
output_folder='ks4/',
|
||||
batch_size=30000 # Smaller batch
|
||||
)
|
||||
```
|
||||
|
||||
**Too Few Units Found**
|
||||
```python
|
||||
# Lower detection threshold
|
||||
sorting = si.run_sorter(
|
||||
'kilosort4',
|
||||
recording,
|
||||
output_folder='ks4/',
|
||||
Th_universal=7, # Lower from default 9
|
||||
Th_learned=6
|
||||
)
|
||||
```
|
||||
|
||||
**Too Many Units (Over-splitting)**
|
||||
```python
|
||||
# Increase minimum distance between templates
|
||||
sorting = si.run_sorter(
|
||||
'kilosort4',
|
||||
recording,
|
||||
output_folder='ks4/',
|
||||
dmin=20, # Increase from 15
|
||||
dminx=16 # Increase from 12
|
||||
)
|
||||
```
|
||||
|
||||
**Check GPU Availability**
|
||||
```python
|
||||
import torch
|
||||
print(f"CUDA available: {torch.cuda.is_available()}")
|
||||
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
||||
```
|
||||
Reference in New Issue
Block a user