mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Remove Custom Field Selection and Population-Specific Exports sections
- Remove detailed custom TSV field configuration section - Remove population-based export workflow section - Simplify export guide to focus on core functionality
This commit is contained in:
@@ -62,72 +62,6 @@ ds.export_tsv(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Custom Field Selection
|
|
||||||
```python
|
|
||||||
# Define custom TSV fields
|
|
||||||
custom_fields = [
|
|
||||||
"CHR", # Chromosome
|
|
||||||
"POS", # Position
|
|
||||||
"ID", # Variant ID
|
|
||||||
"REF", # Reference allele
|
|
||||||
"ALT", # Alternative allele
|
|
||||||
"QUAL", # Quality score
|
|
||||||
"FILTER", # Filter status
|
|
||||||
"I:AF", # INFO: Allele frequency
|
|
||||||
"I:AC", # INFO: Allele count
|
|
||||||
"I:AN", # INFO: Allele number
|
|
||||||
"S:GT", # Sample: Genotype
|
|
||||||
"S:DP", # Sample: Depth
|
|
||||||
"S:GQ" # Sample: Genotype quality
|
|
||||||
]
|
|
||||||
|
|
||||||
ds.export_tsv(
|
|
||||||
uri="detailed_variants.tsv",
|
|
||||||
regions=["chr1:1000000-2000000"],
|
|
||||||
samples=["SAMPLE_001", "SAMPLE_002"],
|
|
||||||
tsv_fields=custom_fields
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Population-Specific Exports
|
|
||||||
```python
|
|
||||||
def export_population_data(ds, regions, population_file, output_prefix):
|
|
||||||
"""Export data for different populations separately"""
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
# Read population assignments
|
|
||||||
pop_df = pd.read_csv(population_file)
|
|
||||||
|
|
||||||
populations = {}
|
|
||||||
for _, row in pop_df.iterrows():
|
|
||||||
pop = row['population']
|
|
||||||
if pop not in populations:
|
|
||||||
populations[pop] = []
|
|
||||||
populations[pop].append(row['sample_id'])
|
|
||||||
|
|
||||||
# Export each population
|
|
||||||
for pop_name, samples in populations.items():
|
|
||||||
output_file = f"{output_prefix}_{pop_name}.tsv"
|
|
||||||
|
|
||||||
print(f"Exporting {pop_name}: {len(samples)} samples")
|
|
||||||
|
|
||||||
ds.export_tsv(
|
|
||||||
uri=output_file,
|
|
||||||
regions=regions,
|
|
||||||
samples=samples,
|
|
||||||
tsv_fields=["CHR", "POS", "REF", "ALT", "S:GT", "S:AF"]
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"Exported {pop_name} data to {output_file}")
|
|
||||||
|
|
||||||
# Usage
|
|
||||||
export_population_data(
|
|
||||||
ds,
|
|
||||||
regions=["chr1:1000000-2000000"],
|
|
||||||
population_file="populations.csv",
|
|
||||||
output_prefix="population_variants"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Pandas DataFrame Export
|
## Pandas DataFrame Export
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user