mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Fix TileDB-Cloud VCF query API syntax
- Correct method: tiledb.cloud.vcf.read() not query_variants() - Fix parameter: attrs not attributes - Add namespace parameter for billing account - Add .to_pandas() conversion step - Use realistic example with TileDB-Inc dataset URI
This commit is contained in:
@@ -422,15 +422,28 @@ tiledb.cloud.vcf.ingestion.ingest_vcf_dataset(
|
|||||||
```python
|
```python
|
||||||
# TileDB-Cloud: VCF querying across distributed storage
|
# TileDB-Cloud: VCF querying across distributed storage
|
||||||
import tiledb.cloud.vcf
|
import tiledb.cloud.vcf
|
||||||
|
import tiledbvcf
|
||||||
|
|
||||||
# Use specialized VCF query module
|
# Define the dataset URI
|
||||||
# Queries leverage TileDB-Cloud's distributed architecture
|
dataset_uri = "tiledb://TileDB-Inc/gvcf-1kg-dragen-v376"
|
||||||
results = tiledb.cloud.vcf.query.query_variants(
|
|
||||||
dataset_uri="tiledb://my-namespace/large-cohort",
|
# Get all samples from the dataset
|
||||||
regions=["chr1:1000000-2000000"],
|
ds = tiledbvcf.Dataset(dataset_uri, tiledb_config=cfg)
|
||||||
samples=cohort_samples,
|
samples = ds.samples()
|
||||||
attributes=["sample_name", "pos_start", "alleles", "fmt_GT"]
|
|
||||||
|
# Define attributes and ranges to query on
|
||||||
|
attrs = ["sample_name", "fmt_GT", "fmt_AD", "fmt_DP"]
|
||||||
|
regions = ["chr13:32396898-32397044", "chr13:32398162-32400268"]
|
||||||
|
|
||||||
|
# Perform the read, which is executed in a distributed fashion
|
||||||
|
df = tiledb.cloud.vcf.read(
|
||||||
|
dataset_uri=dataset_uri,
|
||||||
|
regions=regions,
|
||||||
|
samples=samples,
|
||||||
|
attrs=attrs,
|
||||||
|
namespace="my-namespace", # specifies which account to charge
|
||||||
)
|
)
|
||||||
|
df.to_pandas()
|
||||||
```
|
```
|
||||||
|
|
||||||
### Enterprise Features
|
### Enterprise Features
|
||||||
|
|||||||
Reference in New Issue
Block a user