From 70a34bd6523bb4bbdb6fdc42aceb983e27d17725 Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Tue, 24 Feb 2026 11:11:53 -0700 Subject: [PATCH] Remove Custom Field Selection and Population-Specific Exports sections - Remove detailed custom TSV field configuration section - Remove population-based export workflow section - Simplify export guide to focus on core functionality --- .../tiledbvcf/references/export.md | 66 ------------------- 1 file changed, 66 deletions(-) diff --git a/scientific-skills/tiledbvcf/references/export.md b/scientific-skills/tiledbvcf/references/export.md index db3b8cf..a52f641 100644 --- a/scientific-skills/tiledbvcf/references/export.md +++ b/scientific-skills/tiledbvcf/references/export.md @@ -62,72 +62,6 @@ ds.export_tsv( ) ``` -### Custom Field Selection -```python -# Define custom TSV fields -custom_fields = [ - "CHR", # Chromosome - "POS", # Position - "ID", # Variant ID - "REF", # Reference allele - "ALT", # Alternative allele - "QUAL", # Quality score - "FILTER", # Filter status - "I:AF", # INFO: Allele frequency - "I:AC", # INFO: Allele count - "I:AN", # INFO: Allele number - "S:GT", # Sample: Genotype - "S:DP", # Sample: Depth - "S:GQ" # Sample: Genotype quality -] - -ds.export_tsv( - uri="detailed_variants.tsv", - regions=["chr1:1000000-2000000"], - samples=["SAMPLE_001", "SAMPLE_002"], - tsv_fields=custom_fields -) -``` - -### Population-Specific Exports -```python -def export_population_data(ds, regions, population_file, output_prefix): - """Export data for different populations separately""" - import pandas as pd - - # Read population assignments - pop_df = pd.read_csv(population_file) - - populations = {} - for _, row in pop_df.iterrows(): - pop = row['population'] - if pop not in populations: - populations[pop] = [] - populations[pop].append(row['sample_id']) - - # Export each population - for pop_name, samples in populations.items(): - output_file = f"{output_prefix}_{pop_name}.tsv" - - print(f"Exporting {pop_name}: {len(samples)} samples") - - ds.export_tsv( - uri=output_file, - regions=regions, - samples=samples, - tsv_fields=["CHR", "POS", "REF", "ALT", "S:GT", "S:AF"] - ) - - print(f"Exported {pop_name} data to {output_file}") - -# Usage -export_population_data( - ds, - regions=["chr1:1000000-2000000"], - population_file="populations.csv", - output_prefix="population_variants" -) -``` ## Pandas DataFrame Export