diff --git a/scientific-skills/tiledbvcf/references/export.md b/scientific-skills/tiledbvcf/references/export.md index db3b8cf..a52f641 100644 --- a/scientific-skills/tiledbvcf/references/export.md +++ b/scientific-skills/tiledbvcf/references/export.md @@ -62,72 +62,6 @@ ds.export_tsv( ) ``` -### Custom Field Selection -```python -# Define custom TSV fields -custom_fields = [ - "CHR", # Chromosome - "POS", # Position - "ID", # Variant ID - "REF", # Reference allele - "ALT", # Alternative allele - "QUAL", # Quality score - "FILTER", # Filter status - "I:AF", # INFO: Allele frequency - "I:AC", # INFO: Allele count - "I:AN", # INFO: Allele number - "S:GT", # Sample: Genotype - "S:DP", # Sample: Depth - "S:GQ" # Sample: Genotype quality -] - -ds.export_tsv( - uri="detailed_variants.tsv", - regions=["chr1:1000000-2000000"], - samples=["SAMPLE_001", "SAMPLE_002"], - tsv_fields=custom_fields -) -``` - -### Population-Specific Exports -```python -def export_population_data(ds, regions, population_file, output_prefix): - """Export data for different populations separately""" - import pandas as pd - - # Read population assignments - pop_df = pd.read_csv(population_file) - - populations = {} - for _, row in pop_df.iterrows(): - pop = row['population'] - if pop not in populations: - populations[pop] = [] - populations[pop].append(row['sample_id']) - - # Export each population - for pop_name, samples in populations.items(): - output_file = f"{output_prefix}_{pop_name}.tsv" - - print(f"Exporting {pop_name}: {len(samples)} samples") - - ds.export_tsv( - uri=output_file, - regions=regions, - samples=samples, - tsv_fields=["CHR", "POS", "REF", "ALT", "S:GT", "S:AF"] - ) - - print(f"Exported {pop_name} data to {output_file}") - -# Usage -export_population_data( - ds, - regions=["chr1:1000000-2000000"], - population_file="populations.csv", - output_prefix="population_variants" -) -``` ## Pandas DataFrame Export