From 07e8e0e2840387649f0a8d95dbda1980d3e17f28 Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Tue, 24 Feb 2026 11:00:51 -0700 Subject: [PATCH] Fix TileDB-Cloud VCF query API syntax - Correct method: tiledb.cloud.vcf.read() not query_variants() - Fix parameter: attrs not attributes - Add namespace parameter for billing account - Add .to_pandas() conversion step - Use realistic example with TileDB-Inc dataset URI --- scientific-skills/tiledbvcf/SKILL.md | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/scientific-skills/tiledbvcf/SKILL.md b/scientific-skills/tiledbvcf/SKILL.md index 3b9d432..5a9e652 100644 --- a/scientific-skills/tiledbvcf/SKILL.md +++ b/scientific-skills/tiledbvcf/SKILL.md @@ -422,15 +422,28 @@ tiledb.cloud.vcf.ingestion.ingest_vcf_dataset( ```python # TileDB-Cloud: VCF querying across distributed storage import tiledb.cloud.vcf +import tiledbvcf -# Use specialized VCF query module -# Queries leverage TileDB-Cloud's distributed architecture -results = tiledb.cloud.vcf.query.query_variants( - dataset_uri="tiledb://my-namespace/large-cohort", - regions=["chr1:1000000-2000000"], - samples=cohort_samples, - attributes=["sample_name", "pos_start", "alleles", "fmt_GT"] +# Define the dataset URI +dataset_uri = "tiledb://TileDB-Inc/gvcf-1kg-dragen-v376" + +# Get all samples from the dataset +ds = tiledbvcf.Dataset(dataset_uri, tiledb_config=cfg) +samples = ds.samples() + +# Define attributes and ranges to query on +attrs = ["sample_name", "fmt_GT", "fmt_AD", "fmt_DP"] +regions = ["chr13:32396898-32397044", "chr13:32398162-32400268"] + +# Perform the read, which is executed in a distributed fashion +df = tiledb.cloud.vcf.read( + dataset_uri=dataset_uri, + regions=regions, + samples=samples, + attrs=attrs, + namespace="my-namespace", # specifies which account to charge ) +df.to_pandas() ``` ### Enterprise Features