mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Fix TileDB-Cloud VCF query API syntax
- Correct method: tiledb.cloud.vcf.read() not query_variants() - Fix parameter: attrs not attributes - Add namespace parameter for billing account - Add .to_pandas() conversion step - Use realistic example with TileDB-Inc dataset URI
This commit is contained in:
@@ -422,15 +422,28 @@ tiledb.cloud.vcf.ingestion.ingest_vcf_dataset(
|
||||
```python
|
||||
# TileDB-Cloud: VCF querying across distributed storage
|
||||
import tiledb.cloud.vcf
|
||||
import tiledbvcf
|
||||
|
||||
# Use specialized VCF query module
|
||||
# Queries leverage TileDB-Cloud's distributed architecture
|
||||
results = tiledb.cloud.vcf.query.query_variants(
|
||||
dataset_uri="tiledb://my-namespace/large-cohort",
|
||||
regions=["chr1:1000000-2000000"],
|
||||
samples=cohort_samples,
|
||||
attributes=["sample_name", "pos_start", "alleles", "fmt_GT"]
|
||||
# Define the dataset URI
|
||||
dataset_uri = "tiledb://TileDB-Inc/gvcf-1kg-dragen-v376"
|
||||
|
||||
# Get all samples from the dataset
|
||||
ds = tiledbvcf.Dataset(dataset_uri, tiledb_config=cfg)
|
||||
samples = ds.samples()
|
||||
|
||||
# Define attributes and ranges to query on
|
||||
attrs = ["sample_name", "fmt_GT", "fmt_AD", "fmt_DP"]
|
||||
regions = ["chr13:32396898-32397044", "chr13:32398162-32400268"]
|
||||
|
||||
# Perform the read, which is executed in a distributed fashion
|
||||
df = tiledb.cloud.vcf.read(
|
||||
dataset_uri=dataset_uri,
|
||||
regions=regions,
|
||||
samples=samples,
|
||||
attrs=attrs,
|
||||
namespace="my-namespace", # specifies which account to charge
|
||||
)
|
||||
df.to_pandas()
|
||||
```
|
||||
|
||||
### Enterprise Features
|
||||
|
||||
Reference in New Issue
Block a user