Remove Custom Field Selection and Population-Specific Exports sections

- Remove detailed custom TSV field configuration section
- Remove population-based export workflow section
- Simplify export guide to focus on core functionality
This commit is contained in:
Jeremy Leipzig
2026-02-24 11:11:53 -07:00
parent b4b8572244
commit 70a34bd652

View File

@@ -62,72 +62,6 @@ ds.export_tsv(
)
```
### Custom Field Selection
```python
# Define custom TSV fields
custom_fields = [
"CHR", # Chromosome
"POS", # Position
"ID", # Variant ID
"REF", # Reference allele
"ALT", # Alternative allele
"QUAL", # Quality score
"FILTER", # Filter status
"I:AF", # INFO: Allele frequency
"I:AC", # INFO: Allele count
"I:AN", # INFO: Allele number
"S:GT", # Sample: Genotype
"S:DP", # Sample: Depth
"S:GQ" # Sample: Genotype quality
]
ds.export_tsv(
uri="detailed_variants.tsv",
regions=["chr1:1000000-2000000"],
samples=["SAMPLE_001", "SAMPLE_002"],
tsv_fields=custom_fields
)
```
### Population-Specific Exports
```python
def export_population_data(ds, regions, population_file, output_prefix):
"""Export data for different populations separately"""
import pandas as pd
# Read population assignments
pop_df = pd.read_csv(population_file)
populations = {}
for _, row in pop_df.iterrows():
pop = row['population']
if pop not in populations:
populations[pop] = []
populations[pop].append(row['sample_id'])
# Export each population
for pop_name, samples in populations.items():
output_file = f"{output_prefix}_{pop_name}.tsv"
print(f"Exporting {pop_name}: {len(samples)} samples")
ds.export_tsv(
uri=output_file,
regions=regions,
samples=samples,
tsv_fields=["CHR", "POS", "REF", "ALT", "S:GT", "S:AF"]
)
print(f"Exported {pop_name} data to {output_file}")
# Usage
export_population_data(
ds,
regions=["chr1:1000000-2000000"],
population_file="populations.csv",
output_prefix="population_variants"
)
```
## Pandas DataFrame Export