From 000a45c0e90f474aa46b21d1e3b40fa4aa8a7c0e Mon Sep 17 00:00:00 2001 From: Timothy Kassis Date: Tue, 21 Oct 2025 12:50:07 -0700 Subject: [PATCH] Apply best practices --- .claude-plugin/marketplace.json | 2 +- .../alphafold-database/SKILL.md | 15 ++++++++++- scientific-databases/chembl-database/SKILL.md | 4 +-- .../clinicaltrials-database/SKILL.md | 4 +-- .../clinpgx-database/SKILL.md | 4 +-- .../clinvar-database/SKILL.md | 5 +++- scientific-databases/cosmic-database/SKILL.md | 2 +- scientific-databases/ena-database/SKILL.md | 15 ++++++++++- .../ensembl-database/SKILL.md | 13 ++++++++++ scientific-databases/gene-database/SKILL.md | 6 +++-- scientific-databases/geo-database/SKILL.md | 6 ++++- scientific-databases/gwas-database/SKILL.md | 2 +- scientific-databases/hmdb-database/SKILL.md | 6 ++++- scientific-databases/kegg-database/SKILL.md | 6 ++++- .../metabolomics-workbench-database/SKILL.md | 4 ++- .../opentargets-database/SKILL.md | 2 +- scientific-databases/pdb-database/SKILL.md | 11 +++++++- .../pubchem-database/SKILL.md | 13 +++++++++- scientific-databases/pubmed-database/SKILL.md | 2 +- .../reactome-database/SKILL.md | 13 +++++++++- scientific-databases/string-database/SKILL.md | 18 +++++++------ .../uniprot-database/SKILL.md | 18 ++++++++----- scientific-databases/uspto-database/SKILL.md | 4 +-- scientific-databases/zinc-database/SKILL.md | 4 +-- .../benchling-integration/SKILL.md | 2 +- .../dnanexus-integration/SKILL.md | 25 +++++++------------ .../labarchive-integration/SKILL.md | 15 ++++++++--- .../latchbio-integration/SKILL.md | 2 +- .../omero-integration/SKILL.md | 16 ++++++++++-- .../opentrons-integration/SKILL.md | 16 ++++++++++-- scientific-packages/anndata/SKILL.md | 2 +- scientific-packages/arboreto/SKILL.md | 2 +- scientific-packages/biomni/SKILL.md | 2 +- scientific-packages/biopython/SKILL.md | 2 +- scientific-packages/bioservices/SKILL.md | 4 +-- scientific-packages/cellxgene-census/SKILL.md | 2 +- scientific-packages/cobrapy/SKILL.md | 2 +- scientific-packages/dask/SKILL.md | 4 +-- scientific-packages/datamol/SKILL.md | 2 +- scientific-packages/deepchem/SKILL.md | 2 +- scientific-packages/deeptools/SKILL.md | 4 +-- scientific-packages/diffdock/SKILL.md | 4 +-- scientific-packages/etetoolkit/SKILL.md | 2 +- scientific-packages/flowio/SKILL.md | 4 +-- scientific-packages/gget/SKILL.md | 2 +- scientific-packages/matchms/SKILL.md | 2 +- scientific-packages/matplotlib/SKILL.md | 2 +- scientific-packages/medchem/SKILL.md | 12 +++++++-- scientific-packages/molfeat/SKILL.md | 11 ++------ scientific-packages/polars/SKILL.md | 2 +- scientific-packages/pydeseq2/SKILL.md | 10 +------- scientific-packages/pymatgen/SKILL.md | 10 +------- scientific-packages/pymc/SKILL.md | 2 +- scientific-packages/pymoo/SKILL.md | 4 +-- scientific-packages/pyopenms/SKILL.md | 12 +++++++-- scientific-packages/pysam/SKILL.md | 13 +++------- scientific-packages/pytdc/SKILL.md | 13 ++++++++-- .../pytorch-lightning/SKILL.md | 14 +++++++++-- scientific-packages/scanpy/SKILL.md | 4 +-- scientific-packages/scikit-bio/SKILL.md | 4 +-- scientific-packages/scikit-learn/SKILL.md | 4 +-- scientific-packages/seaborn/SKILL.md | 2 +- scientific-packages/statsmodels/SKILL.md | 4 +-- scientific-packages/torch_geometric/SKILL.md | 2 +- scientific-packages/torchdrug/SKILL.md | 15 +++-------- scientific-packages/transformers/SKILL.md | 4 +-- scientific-packages/umap-learn/SKILL.md | 10 +------- scientific-packages/zarr-python/SKILL.md | 10 +------- .../document-skills/docx/SKILL.md | 2 +- .../document-skills/pdf/SKILL.md | 2 +- .../document-skills/pptx/SKILL.md | 2 +- .../document-skills/xlsx/SKILL.md | 2 +- .../exploratory-data-analysis/SKILL.md | 2 +- .../hypothesis-generation/SKILL.md | 12 ++++++++- scientific-thinking/peer-review/SKILL.md | 13 +++++++++- .../scientific-brainstorming/SKILL.md | 13 +++++++++- .../scientific-critical-thinking/SKILL.md | 13 +++++++++- .../scientific-visualization/SKILL.md | 10 ++------ .../scientific-writing/SKILL.md | 2 +- .../statistical-analysis/SKILL.md | 13 +++++++++- 80 files changed, 347 insertions(+), 200 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index d661788..8a7ebad 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -7,7 +7,7 @@ }, "metadata": { "description": "Claude scientific skills from K-Dense Inc", - "version": "1.39.0" + "version": "1.40.0" }, "plugins": [ { diff --git a/scientific-databases/alphafold-database/SKILL.md b/scientific-databases/alphafold-database/SKILL.md index 5b0885a..6f8c3a8 100644 --- a/scientific-databases/alphafold-database/SKILL.md +++ b/scientific-databases/alphafold-database/SKILL.md @@ -7,7 +7,20 @@ description: "Access AlphaFold's 200M+ AI-predicted protein structures. Retrieve ## Overview -This skill provides tools and guidance for working with the AlphaFold Protein Structure Database (AlphaFold DB), a public repository containing AI-predicted 3D protein structures for over 200 million proteins. Maintained by DeepMind and EMBL-EBI, AlphaFold DB provides structure predictions with confidence estimates for nearly complete proteomes across multiple organisms. Use this skill to search for predictions, retrieve structural data with confidence metrics, download coordinate files, access bulk datasets, and integrate AlphaFold predictions into computational workflows. +AlphaFold DB is a public repository of AI-predicted 3D protein structures for over 200 million proteins, maintained by DeepMind and EMBL-EBI. Access structure predictions with confidence metrics, download coordinate files, retrieve bulk datasets, and integrate predictions into computational workflows. + +## When to Use This Skill + +This skill should be used when working with AI-predicted protein structures in scenarios such as: + +- Retrieving protein structure predictions by UniProt ID or protein name +- Downloading PDB/mmCIF coordinate files for structural analysis +- Analyzing prediction confidence metrics (pLDDT, PAE) to assess reliability +- Accessing bulk proteome datasets via Google Cloud Platform +- Comparing predicted structures with experimental data +- Performing structure-based drug discovery or protein engineering +- Building structural models for proteins lacking experimental structures +- Integrating AlphaFold predictions into computational pipelines ## Core Capabilities diff --git a/scientific-databases/chembl-database/SKILL.md b/scientific-databases/chembl-database/SKILL.md index 070007e..fb61f07 100644 --- a/scientific-databases/chembl-database/SKILL.md +++ b/scientific-databases/chembl-database/SKILL.md @@ -7,11 +7,11 @@ description: "Query ChEMBL's bioactive molecules and drug discovery data. Search ## Overview -Facilitate access to and querying of the ChEMBL database, a manually curated repository of bioactive molecules with drug-like properties maintained by the European Bioinformatics Institute (EBI). ChEMBL contains over 2 million compounds, 19 million bioactivity measurements, information on 13,000+ drug targets, and data on approved drugs and clinical candidates. +ChEMBL is a manually curated database of bioactive molecules maintained by the European Bioinformatics Institute (EBI), containing over 2 million compounds, 19 million bioactivity measurements, 13,000+ drug targets, and data on approved drugs and clinical candidates. Access and query this data programmatically using the ChEMBL Python client for drug discovery and medicinal chemistry research. ## When to Use This Skill -This skill should be used when queries involve: +This skill should be used when: - **Compound searches**: Finding molecules by name, structure, or properties - **Target information**: Retrieving data about proteins, enzymes, or biological targets diff --git a/scientific-databases/clinicaltrials-database/SKILL.md b/scientific-databases/clinicaltrials-database/SKILL.md index 90b376c..54723c6 100644 --- a/scientific-databases/clinicaltrials-database/SKILL.md +++ b/scientific-databases/clinicaltrials-database/SKILL.md @@ -7,9 +7,7 @@ description: "Query ClinicalTrials.gov via API v2. Search trials by condition, d ## Overview -The ClinicalTrials.gov database is a comprehensive registry of clinical studies conducted around the world, maintained by the U.S. National Library of Medicine. This skill provides access to the ClinicalTrials.gov API v2, enabling programmatic queries to search for trials, retrieve detailed study information, filter by various criteria, and export data for analysis. - -The API is public (no authentication required) with a rate limit of approximately 50 requests per minute per IP address. It supports both JSON and CSV response formats and uses modern standards including ISO 8601 dates and CommonMark Markdown formatting. +ClinicalTrials.gov is a comprehensive registry of clinical studies conducted worldwide, maintained by the U.S. National Library of Medicine. Access API v2 to search for trials, retrieve detailed study information, filter by various criteria, and export data for analysis. The API is public (no authentication required) with rate limits of ~50 requests per minute, supporting JSON and CSV formats. ## When to Use This Skill diff --git a/scientific-databases/clinpgx-database/SKILL.md b/scientific-databases/clinpgx-database/SKILL.md index 585b052..b50f074 100644 --- a/scientific-databases/clinpgx-database/SKILL.md +++ b/scientific-databases/clinpgx-database/SKILL.md @@ -7,11 +7,11 @@ description: "Access ClinPGx pharmacogenomics data (successor to PharmGKB). Quer ## Overview -Facilitate access to and querying of ClinPGx (Clinical Pharmacogenomics Database), a comprehensive resource for clinical pharmacogenomics information. ClinPGx is the successor to PharmGKB (launched officially in July 2025) and consolidates data from PharmGKB, CPIC (Clinical Pharmacogenetics Implementation Consortium), and PharmCAT (Pharmacogenomics Clinical Annotation Tool). The database provides curated information on how human genetic variation affects medication response, including gene-drug pairs, clinical guidelines, allele functions, and drug labels. Managed at Stanford University as a ClinGen (Clinical Genome Resource) affiliate grant. +ClinPGx (Clinical Pharmacogenomics Database) is a comprehensive resource for clinical pharmacogenomics information, successor to PharmGKB. It consolidates data from PharmGKB, CPIC, and PharmCAT, providing curated information on how genetic variation affects medication response. Access gene-drug pairs, clinical guidelines, allele functions, and drug labels for precision medicine applications. ## When to Use This Skill -This skill should be used when queries involve: +This skill should be used when: - **Gene-drug interactions**: Querying how genetic variants affect drug metabolism, efficacy, or toxicity - **CPIC guidelines**: Accessing evidence-based clinical practice guidelines for pharmacogenetics diff --git a/scientific-databases/clinvar-database/SKILL.md b/scientific-databases/clinvar-database/SKILL.md index 2e0c729..79cf40d 100644 --- a/scientific-databases/clinvar-database/SKILL.md +++ b/scientific-databases/clinvar-database/SKILL.md @@ -9,7 +9,10 @@ description: "Query NCBI ClinVar for variant clinical significance. Search by ge ClinVar is NCBI's freely accessible archive of reports on relationships between human genetic variants and phenotypes, with supporting evidence. The database aggregates information about genomic variation and its relationship to human health, providing standardized variant classifications used in clinical genetics and research. -Use this skill for tasks involving: +## When to Use This Skill + +This skill should be used when: + - Searching for variants by gene, condition, or clinical significance - Interpreting clinical significance classifications (pathogenic, benign, VUS) - Accessing ClinVar data programmatically via E-utilities API diff --git a/scientific-databases/cosmic-database/SKILL.md b/scientific-databases/cosmic-database/SKILL.md index 1b31915..07355f8 100644 --- a/scientific-databases/cosmic-database/SKILL.md +++ b/scientific-databases/cosmic-database/SKILL.md @@ -7,7 +7,7 @@ description: "Access COSMIC cancer mutation database. Query somatic mutations, C ## Overview -COSMIC (Catalogue of Somatic Mutations in Cancer) is the world's largest and most comprehensive database for exploring somatic mutations in human cancer. This skill enables programmatic access to COSMIC's extensive collection of cancer genomics data, including millions of mutations across thousands of cancer types, curated gene lists, mutational signatures, and clinical annotations. +COSMIC (Catalogue of Somatic Mutations in Cancer) is the world's largest and most comprehensive database for exploring somatic mutations in human cancer. Access COSMIC's extensive collection of cancer genomics data, including millions of mutations across thousands of cancer types, curated gene lists, mutational signatures, and clinical annotations programmatically. ## When to Use This Skill diff --git a/scientific-databases/ena-database/SKILL.md b/scientific-databases/ena-database/SKILL.md index 655c3b1..91b5928 100644 --- a/scientific-databases/ena-database/SKILL.md +++ b/scientific-databases/ena-database/SKILL.md @@ -7,7 +7,20 @@ description: "Access European Nucleotide Archive via API/FTP. Retrieve DNA/RNA s ## Overview -This skill provides tools and guidance for working with the European Nucleotide Archive (ENA), a comprehensive public repository for nucleotide sequence data and associated metadata. ENA serves as a global platform for managing, sharing, and accessing DNA/RNA sequences, raw reads, genome assemblies, and functional annotations. +The European Nucleotide Archive (ENA) is a comprehensive public repository for nucleotide sequence data and associated metadata. Access and query DNA/RNA sequences, raw reads, genome assemblies, and functional annotations through REST APIs and FTP for genomics and bioinformatics pipelines. + +## When to Use This Skill + +This skill should be used when: + +- Retrieving nucleotide sequences or raw sequencing reads by accession +- Searching for samples, studies, or assemblies by metadata criteria +- Downloading FASTQ files or genome assemblies for analysis +- Querying taxonomic information for organisms +- Accessing sequence annotations and functional data +- Integrating ENA data into bioinformatics pipelines +- Performing cross-reference searches to related databases +- Bulk downloading datasets via FTP or Aspera ## Core Capabilities diff --git a/scientific-databases/ensembl-database/SKILL.md b/scientific-databases/ensembl-database/SKILL.md index c29a9eb..9038938 100644 --- a/scientific-databases/ensembl-database/SKILL.md +++ b/scientific-databases/ensembl-database/SKILL.md @@ -9,6 +9,19 @@ description: "Query Ensembl genome database REST API for 250+ species. Gene look Access and query the Ensembl genome database, a comprehensive resource for vertebrate genomic data maintained by EMBL-EBI. The database provides gene annotations, sequences, variants, regulatory information, and comparative genomics data for over 250 species. Current release is 115 (September 2025). +## When to Use This Skill + +This skill should be used when: + +- Querying gene information by symbol or Ensembl ID +- Retrieving DNA, transcript, or protein sequences +- Analyzing genetic variants using the Variant Effect Predictor (VEP) +- Finding orthologs and paralogs across species +- Accessing regulatory features and genomic annotations +- Converting coordinates between genome assemblies (e.g., GRCh37 to GRCh38) +- Performing comparative genomics analyses +- Integrating Ensembl data into genomic research pipelines + ## Core Capabilities ### 1. Gene Information Retrieval diff --git a/scientific-databases/gene-database/SKILL.md b/scientific-databases/gene-database/SKILL.md index 67e2013..bade9be 100644 --- a/scientific-databases/gene-database/SKILL.md +++ b/scientific-databases/gene-database/SKILL.md @@ -7,9 +7,11 @@ description: "Query NCBI Gene via E-utilities/Datasets API. Search by symbol/ID, ## Overview -This skill enables programmatic access to NCBI Gene, a comprehensive database integrating gene information from diverse species. NCBI Gene provides nomenclature, reference sequences (RefSeqs), chromosomal maps, biological pathways, genetic variations, phenotypes, and cross-references to global genomic resources. +NCBI Gene is a comprehensive database integrating gene information from diverse species. It provides nomenclature, reference sequences (RefSeqs), chromosomal maps, biological pathways, genetic variations, phenotypes, and cross-references to global genomic resources. -Use this skill when working with gene data including searching by gene symbol or ID, retrieving gene sequences and metadata, analyzing gene functions and pathways, or performing batch gene lookups. +## When to Use This Skill + +This skill should be used when working with gene data including searching by gene symbol or ID, retrieving gene sequences and metadata, analyzing gene functions and pathways, or performing batch gene lookups. ## Quick Start diff --git a/scientific-databases/geo-database/SKILL.md b/scientific-databases/geo-database/SKILL.md index 50884d3..e99ca94 100644 --- a/scientific-databases/geo-database/SKILL.md +++ b/scientific-databases/geo-database/SKILL.md @@ -7,7 +7,11 @@ description: "Access NCBI GEO for gene expression/genomics data. Search/download ## Overview -This skill provides tools and guidance for working with the Gene Expression Omnibus (GEO), NCBI's public repository for high-throughput gene expression and functional genomics data. GEO contains over 264,000 studies with more than 8 million samples from both array-based and sequence-based experiments. Use this skill to search for gene expression datasets, retrieve experimental data, download raw and processed files, query expression profiles, and integrate GEO data into computational analysis workflows. +The Gene Expression Omnibus (GEO) is NCBI's public repository for high-throughput gene expression and functional genomics data. GEO contains over 264,000 studies with more than 8 million samples from both array-based and sequence-based experiments. + +## When to Use This Skill + +This skill should be used when searching for gene expression datasets, retrieving experimental data, downloading raw and processed files, querying expression profiles, or integrating GEO data into computational analysis workflows. ## Core Capabilities diff --git a/scientific-databases/gwas-database/SKILL.md b/scientific-databases/gwas-database/SKILL.md index 2b1617d..7dc8038 100644 --- a/scientific-databases/gwas-database/SKILL.md +++ b/scientific-databases/gwas-database/SKILL.md @@ -7,7 +7,7 @@ description: "Query NHGRI-EBI GWAS Catalog for SNP-trait associations. Search va ## Overview -Facilitate access to and querying of the GWAS Catalog, a comprehensive repository of published genome-wide association studies maintained by the National Human Genome Research Institute (NHGRI) and the European Bioinformatics Institute (EBI). The catalog contains curated SNP-trait associations from thousands of GWAS publications, including genetic variants, associated traits and diseases, p-values, effect sizes, and full summary statistics for many studies. +The GWAS Catalog is a comprehensive repository of published genome-wide association studies maintained by the National Human Genome Research Institute (NHGRI) and the European Bioinformatics Institute (EBI). The catalog contains curated SNP-trait associations from thousands of GWAS publications, including genetic variants, associated traits and diseases, p-values, effect sizes, and full summary statistics for many studies. ## When to Use This Skill diff --git a/scientific-databases/hmdb-database/SKILL.md b/scientific-databases/hmdb-database/SKILL.md index 25f4603..0582de3 100644 --- a/scientific-databases/hmdb-database/SKILL.md +++ b/scientific-databases/hmdb-database/SKILL.md @@ -7,7 +7,11 @@ description: "Access Human Metabolome Database (220K+ metabolites). Search by na ## Overview -The Human Metabolome Database (HMDB) is a comprehensive, freely available resource containing detailed information about small molecule metabolites found in the human body. It supports metabolomics research, clinical chemistry, biomarker discovery, and metabolite identification tasks. +The Human Metabolome Database (HMDB) is a comprehensive, freely available resource containing detailed information about small molecule metabolites found in the human body. + +## When to Use This Skill + +This skill should be used when performing metabolomics research, clinical chemistry, biomarker discovery, or metabolite identification tasks. ## Database Contents diff --git a/scientific-databases/kegg-database/SKILL.md b/scientific-databases/kegg-database/SKILL.md index 5e05e6d..e301232 100644 --- a/scientific-databases/kegg-database/SKILL.md +++ b/scientific-databases/kegg-database/SKILL.md @@ -7,10 +7,14 @@ description: "Query KEGG REST API for pathway analysis, gene-pathway mapping, me ## Overview -KEGG (Kyoto Encyclopedia of Genes and Genomes) is a comprehensive bioinformatics resource for biological pathway analysis and molecular interaction networks. This skill enables interaction with KEGG's REST API to query pathways, genes, compounds, enzymes, diseases, and drugs across multiple organisms. +KEGG (Kyoto Encyclopedia of Genes and Genomes) is a comprehensive bioinformatics resource for biological pathway analysis and molecular interaction networks. **Important**: KEGG API is made available only for academic use by academic users. +## When to Use This Skill + +This skill should be used when querying pathways, genes, compounds, enzymes, diseases, and drugs across multiple organisms using KEGG's REST API. + ## Quick Start The skill provides: diff --git a/scientific-databases/metabolomics-workbench-database/SKILL.md b/scientific-databases/metabolomics-workbench-database/SKILL.md index a0f2109..730a793 100644 --- a/scientific-databases/metabolomics-workbench-database/SKILL.md +++ b/scientific-databases/metabolomics-workbench-database/SKILL.md @@ -9,7 +9,9 @@ description: "Access NIH Metabolomics Workbench via REST API (4,200+ studies). Q The Metabolomics Workbench is a comprehensive NIH Common Fund-sponsored platform hosted at UCSD that serves as the primary repository for metabolomics research data. It provides programmatic access to over 4,200 processed studies (3,790+ publicly available), standardized metabolite nomenclature through RefMet, and powerful search capabilities across multiple analytical platforms (GC-MS, LC-MS, NMR). -This skill enables efficient interaction with the Metabolomics Workbench REST API to query metabolite structures, access study data, standardize nomenclature, perform mass spectrometry searches, and retrieve gene/protein-metabolite associations. +## When to Use This Skill + +This skill should be used when querying metabolite structures, accessing study data, standardizing nomenclature, performing mass spectrometry searches, or retrieving gene/protein-metabolite associations through the Metabolomics Workbench REST API. ## Core Capabilities diff --git a/scientific-databases/opentargets-database/SKILL.md b/scientific-databases/opentargets-database/SKILL.md index c141aa5..b627633 100644 --- a/scientific-databases/opentargets-database/SKILL.md +++ b/scientific-databases/opentargets-database/SKILL.md @@ -7,7 +7,7 @@ description: "Query Open Targets Platform for target-disease associations, drug ## Overview -The Open Targets Platform is a comprehensive resource that supports systematic identification and prioritization of potential therapeutic drug targets. It integrates publicly available datasets including human genetics, omics, literature, and chemical data to build and score target-disease associations. +The Open Targets Platform is a comprehensive resource for systematic identification and prioritization of potential therapeutic drug targets. It integrates publicly available datasets including human genetics, omics, literature, and chemical data to build and score target-disease associations. **Key capabilities:** - Query target (gene) annotations including tractability, safety, expression diff --git a/scientific-databases/pdb-database/SKILL.md b/scientific-databases/pdb-database/SKILL.md index 2f92129..df11f4b 100644 --- a/scientific-databases/pdb-database/SKILL.md +++ b/scientific-databases/pdb-database/SKILL.md @@ -7,7 +7,16 @@ description: "Access RCSB PDB for 3D protein/nucleic acid structures. Search by ## Overview -This skill provides tools and guidance for working with the RCSB Protein Data Bank (PDB), the worldwide repository for 3D structural data of biological macromolecules. The PDB contains over 200,000 experimentally determined structures of proteins, nucleic acids, and complex assemblies, along with computed structure models. Use this skill to search for structures, retrieve structural data, perform sequence and structure similarity searches, and integrate PDB data into computational workflows. +RCSB PDB is the worldwide repository for 3D structural data of biological macromolecules. Search for structures, retrieve coordinates and metadata, perform sequence and structure similarity searches across 200,000+ experimentally determined structures and computed models. + +## When to Use This Skill + +This skill should be used when: +- Searching for protein or nucleic acid 3D structures by text, sequence, or structural similarity +- Downloading coordinate files in PDB, mmCIF, or BinaryCIF formats +- Retrieving structural metadata, experimental methods, or quality metrics +- Performing batch operations across multiple structures +- Integrating PDB data into computational workflows for drug discovery, protein engineering, or structural biology research ## Core Capabilities diff --git a/scientific-databases/pubchem-database/SKILL.md b/scientific-databases/pubchem-database/SKILL.md index d7ec50d..8551065 100644 --- a/scientific-databases/pubchem-database/SKILL.md +++ b/scientific-databases/pubchem-database/SKILL.md @@ -7,7 +7,18 @@ description: "Query PubChem via PUG-REST API/PubChemPy (110M+ compounds). Search ## Overview -PubChem is the world's largest freely available chemical database maintained by the National Center for Biotechnology Information (NCBI). It contains over 110 million unique chemical structures and over 270 million bioactivities from more than 770 data sources. This skill provides guidance for programmatically accessing PubChem data using the PUG-REST API and PubChemPy Python library. +PubChem is the world's largest freely available chemical database with 110M+ compounds and 270M+ bioactivities. Query chemical structures by name, CID, or SMILES, retrieve molecular properties, perform similarity and substructure searches, access bioactivity data using PUG-REST API and PubChemPy. + +## When to Use This Skill + +This skill should be used when: +- Searching for chemical compounds by name, structure (SMILES/InChI), or molecular formula +- Retrieving molecular properties (MW, LogP, TPSA, hydrogen bonding descriptors) +- Performing similarity searches to find structurally related compounds +- Conducting substructure searches for specific chemical motifs +- Accessing bioactivity data from screening assays +- Converting between chemical identifier formats (CID, SMILES, InChI) +- Batch processing multiple compounds for drug-likeness screening or property analysis ## Core Capabilities diff --git a/scientific-databases/pubmed-database/SKILL.md b/scientific-databases/pubmed-database/SKILL.md index 159dcc0..a0c4de6 100644 --- a/scientific-databases/pubmed-database/SKILL.md +++ b/scientific-databases/pubmed-database/SKILL.md @@ -7,7 +7,7 @@ description: "Search PubMed biomedical literature. Advanced queries with Boolean ## Overview -PubMed is the U.S. National Library of Medicine's comprehensive database providing free access to MEDLINE and life sciences literature. This skill provides expertise in searching PubMed effectively, constructing advanced queries, and accessing data programmatically through the E-utilities API. +PubMed is the U.S. National Library of Medicine's comprehensive database providing free access to MEDLINE and life sciences literature. Construct advanced queries with Boolean operators, MeSH terms, and field tags, access data programmatically via E-utilities API for systematic reviews and literature analysis. ## When to Use This Skill diff --git a/scientific-databases/reactome-database/reactome-database/SKILL.md b/scientific-databases/reactome-database/reactome-database/SKILL.md index dc81bee..0d08000 100644 --- a/scientific-databases/reactome-database/reactome-database/SKILL.md +++ b/scientific-databases/reactome-database/reactome-database/SKILL.md @@ -7,7 +7,18 @@ description: "Query Reactome REST API for pathway analysis, enrichment, gene-pat ## Overview -This skill enables interaction with Reactome, a free, open-source, curated and peer-reviewed pathway database. Reactome provides comprehensive biological pathway data for research, genome analysis, modeling, and systems biology. The database contains thousands of human pathways, reactions, proteins, small molecules, and drugs, all supported by extensive literature references. +Reactome is a free, open-source, curated pathway database with 2,825+ human pathways. Query biological pathways, perform overrepresentation and expression analysis, map genes to pathways, explore molecular interactions via REST API and Python client for systems biology research. + +## When to Use This Skill + +This skill should be used when: +- Performing pathway enrichment analysis on gene or protein lists +- Analyzing gene expression data to identify relevant biological pathways +- Querying specific pathway information, reactions, or molecular interactions +- Mapping genes or proteins to biological pathways and processes +- Exploring disease-related pathways and mechanisms +- Visualizing analysis results in the Reactome Pathway Browser +- Conducting comparative pathway analysis across species ## Core Capabilities diff --git a/scientific-databases/string-database/SKILL.md b/scientific-databases/string-database/SKILL.md index 9fdbc72..e9347a8 100644 --- a/scientific-databases/string-database/SKILL.md +++ b/scientific-databases/string-database/SKILL.md @@ -7,15 +7,19 @@ description: "Query STRING API for protein-protein interactions (59M proteins, 2 ## Overview -STRING (Search Tool for the Retrieval of Interacting Genes/Proteins) is a comprehensive database of known and predicted protein-protein interactions. This skill enables interaction with STRING's REST API to query protein networks, analyze functional enrichments, and discover interaction partners across 5000+ organisms. +STRING is a comprehensive database of known and predicted protein-protein interactions covering 59M proteins and 20B+ interactions across 5000+ organisms. Query interaction networks, perform functional enrichment, discover partners via REST API for systems biology and pathway analysis. -**Database Statistics:** -- Coverage: 5000+ genomes -- Proteins: ~59.3 million -- Interactions: 20+ billion -- Evidence types: Experimental data, computational prediction, text-mining, pathway databases +## When to Use This Skill -**Data Sources:** Integrates over 40 sources including experimental repositories, pathway databases, automated text-mining, and computational predictions. +This skill should be used when: +- Retrieving protein-protein interaction networks for single or multiple proteins +- Performing functional enrichment analysis (GO, KEGG, Pfam) on protein lists +- Discovering interaction partners and expanding protein networks +- Testing if proteins form significantly enriched functional modules +- Generating network visualizations with evidence-based coloring +- Analyzing homology and protein family relationships +- Conducting cross-species protein interaction comparisons +- Identifying hub proteins and network connectivity patterns ## Quick Start diff --git a/scientific-databases/uniprot-database/SKILL.md b/scientific-databases/uniprot-database/SKILL.md index d13f670..86e42bb 100644 --- a/scientific-databases/uniprot-database/SKILL.md +++ b/scientific-databases/uniprot-database/SKILL.md @@ -7,13 +7,19 @@ description: "Query UniProt protein database REST API. Search proteins by name/g ## Overview -Interact with UniProt (Universal Protein Resource), the world's leading comprehensive and freely accessible resource for protein sequence and functional information. This skill enables programmatic access to UniProtKB (Swiss-Prot and TrEMBL), UniRef, UniParc, and other UniProt databases through the REST API. +UniProt is the world's leading comprehensive protein sequence and functional information resource. Search proteins by name, gene, or accession, retrieve sequences in FASTA format, perform ID mapping across databases, access Swiss-Prot/TrEMBL annotations via REST API for protein analysis. -**Key databases:** -- **UniProtKB/Swiss-Prot**: Manually annotated, reviewed, high-quality protein entries -- **UniProtKB/TrEMBL**: Automatically annotated, unreviewed protein entries -- **UniRef**: Clustered sets of sequences for similarity searches -- **UniParc**: Comprehensive archive of all protein sequences +## When to Use This Skill + +This skill should be used when: +- Searching for protein entries by name, gene symbol, accession, or organism +- Retrieving protein sequences in FASTA or other formats +- Mapping identifiers between UniProt and external databases (Ensembl, RefSeq, PDB, etc.) +- Accessing protein annotations including GO terms, domains, and functional descriptions +- Batch retrieving multiple protein entries efficiently +- Querying reviewed (Swiss-Prot) vs. unreviewed (TrEMBL) protein data +- Streaming large protein datasets +- Building custom queries with field-specific search syntax ## Core Capabilities diff --git a/scientific-databases/uspto-database/SKILL.md b/scientific-databases/uspto-database/SKILL.md index b1420c1..259a251 100644 --- a/scientific-databases/uspto-database/SKILL.md +++ b/scientific-databases/uspto-database/SKILL.md @@ -7,11 +7,11 @@ description: "Access USPTO APIs for patent/trademark searches, examination histo ## Overview -Access comprehensive United States Patent and Trademark Office data through multiple specialized APIs. This skill enables patent and trademark searching, retrieval of examination history, analysis of citations and office actions, tracking of assignments and ownership, and access to litigation records. +USPTO provides specialized APIs for patent and trademark data. Search patents by keywords/inventors/assignees, retrieve examination history via PEDS, track assignments, analyze citations and office actions, access TSDR for trademarks, for IP analysis and prior art searches. ## When to Use This Skill -Use this skill for tasks involving: +This skill should be used when: - **Patent Search**: Finding patents by keywords, inventors, assignees, classifications, or dates - **Patent Details**: Retrieving full patent data including claims, abstracts, citations diff --git a/scientific-databases/zinc-database/SKILL.md b/scientific-databases/zinc-database/SKILL.md index 6ce1d62..6f85f9d 100644 --- a/scientific-databases/zinc-database/SKILL.md +++ b/scientific-databases/zinc-database/SKILL.md @@ -7,11 +7,11 @@ description: "Access ZINC (230M+ purchasable compounds). Search by ZINC ID/SMILE ## Overview -Facilitate access to and querying of the ZINC database, a freely accessible repository of commercially-available compounds maintained by the Irwin and Shoichet Laboratories at UCSF. ZINC22 contains over 230 million purchasable compounds in ready-to-dock 3D formats, with an additional 750 million compounds searchable for analogs, making it one of the largest resources for virtual screening and ligand discovery. +ZINC is a freely accessible repository of 230M+ purchasable compounds maintained by UCSF. Search by ZINC ID or SMILES, perform similarity searches, download 3D-ready structures for docking, discover analogs for virtual screening and drug discovery. ## When to Use This Skill -This skill should be used when queries involve: +This skill should be used when: - **Virtual screening**: Finding compounds for molecular docking studies - **Lead discovery**: Identifying commercially-available compounds for drug development diff --git a/scientific-integrations/benchling-integration/SKILL.md b/scientific-integrations/benchling-integration/SKILL.md index cad9ed4..988f18b 100644 --- a/scientific-integrations/benchling-integration/SKILL.md +++ b/scientific-integrations/benchling-integration/SKILL.md @@ -7,7 +7,7 @@ description: "Benchling R&D platform integration. Access registry (DNA, proteins ## Overview -Integrate with Benchling's cloud platform for life sciences R&D, enabling programmatic access to registry entities, inventory management, electronic lab notebooks, and workflow automation. This skill provides comprehensive guidance for using both the Python SDK and REST API to interact with Benchling data. +Benchling is a cloud platform for life sciences R&D. Access registry entities (DNA, proteins), inventory, electronic lab notebooks, and workflows programmatically via Python SDK and REST API. ## When to Use This Skill diff --git a/scientific-integrations/dnanexus-integration/SKILL.md b/scientific-integrations/dnanexus-integration/SKILL.md index 003ad74..b3a7e85 100644 --- a/scientific-integrations/dnanexus-integration/SKILL.md +++ b/scientific-integrations/dnanexus-integration/SKILL.md @@ -7,25 +7,18 @@ description: "DNAnexus cloud genomics platform. Build apps/applets, manage data ## Overview -DNAnexus is a cloud-based platform for biomedical data analysis, particularly genomics. This skill provides comprehensive guidance for interacting with DNAnexus through: - -- Building and deploying apps and applets (Python/Bash) -- Managing data objects (files, records, databases) -- Running analyses and workflows -- Using the dxpy Python SDK -- Configuring app metadata and dependencies +DNAnexus is a cloud platform for biomedical data analysis and genomics. Build and deploy apps/applets, manage data objects, run workflows, and use the dxpy Python SDK for genomics pipeline development and execution. ## When to Use This Skill -This skill should be used when working with: - -- **App Development**: Creating, building, or modifying DNAnexus apps/applets -- **Data Management**: Uploading, downloading, searching, or organizing files and records -- **Job Execution**: Running analyses, monitoring jobs, creating workflows -- **Python SDK**: Writing scripts using dxpy to interact with the platform -- **Configuration**: Setting up dxapp.json, managing dependencies, using Docker -- **Genomics Workflows**: Processing FASTQ, BAM, VCF, or other bioinformatics files -- **Platform Operations**: Managing projects, permissions, or platform resources +This skill should be used when: +- Creating, building, or modifying DNAnexus apps/applets +- Uploading, downloading, searching, or organizing files and records +- Running analyses, monitoring jobs, creating workflows +- Writing scripts using dxpy to interact with the platform +- Setting up dxapp.json, managing dependencies, using Docker +- Processing FASTQ, BAM, VCF, or other bioinformatics files +- Managing projects, permissions, or platform resources ## Core Capabilities diff --git a/scientific-integrations/labarchive-integration/SKILL.md b/scientific-integrations/labarchive-integration/SKILL.md index bf7afa0..fd98950 100644 --- a/scientific-integrations/labarchive-integration/SKILL.md +++ b/scientific-integrations/labarchive-integration/SKILL.md @@ -1,15 +1,24 @@ --- name: labarchive-integration -description: "LabArchives ELN API integration. Access notebooks, manage entries/attachments, backup notebooks, integrate with Protocols.io/Jupyter/REDCap, for programmatic ELN workflows." +description: "Electronic lab notebook API integration. Access notebooks, manage entries/attachments, backup notebooks, integrate with Protocols.io/Jupyter/REDCap, for programmatic ELN workflows." --- # LabArchives Integration ## Overview -Provide comprehensive tools and workflows for interacting with the LabArchives Electronic Lab Notebook (ELN) REST API. LabArchives is a widely-used electronic lab notebook platform for research documentation, data management, and collaboration in academic and industrial laboratories. +LabArchives is an electronic lab notebook platform for research documentation and data management. Access notebooks, manage entries and attachments, generate reports, and integrate with third-party tools programmatically via REST API. -This skill enables programmatic access to LabArchives notebooks, including user authentication, notebook operations, entry management, report generation, and third-party integrations. +## When to Use This Skill + +This skill should be used when: +- Working with LabArchives REST API for notebook automation +- Backing up notebooks programmatically +- Creating or managing notebook entries and attachments +- Generating site reports and analytics +- Integrating LabArchives with third-party tools (Protocols.io, Jupyter, REDCap) +- Automating data upload to electronic lab notebooks +- Managing user access and permissions programmatically ## Core Capabilities diff --git a/scientific-integrations/latchbio-integration/SKILL.md b/scientific-integrations/latchbio-integration/SKILL.md index d509eda..27173d2 100644 --- a/scientific-integrations/latchbio-integration/SKILL.md +++ b/scientific-integrations/latchbio-integration/SKILL.md @@ -7,7 +7,7 @@ description: "Latch platform for bioinformatics workflows. Build pipelines with ## Overview -The Latch SDK is a Python framework for building and deploying bioinformatics workflows as serverless pipelines with automatic UI generation. Built on Flyte, Latch provides containerization, type safety, scalable cloud infrastructure, and a comprehensive data management system. This skill provides guidance for creating workflows, managing data, configuring resources, and using verified pre-built pipelines. +Latch is a Python framework for building and deploying bioinformatics workflows as serverless pipelines. Built on Flyte, create workflows with @workflow/@task decorators, manage cloud data with LatchFile/LatchDir, configure resources, and integrate Nextflow/Snakemake pipelines. ## Core Capabilities diff --git a/scientific-integrations/omero-integration/SKILL.md b/scientific-integrations/omero-integration/SKILL.md index b77d6e0..7a4ae60 100644 --- a/scientific-integrations/omero-integration/SKILL.md +++ b/scientific-integrations/omero-integration/SKILL.md @@ -1,13 +1,25 @@ --- name: omero-integration -description: "OMERO microscopy data management. Access images via Python, retrieve datasets, analyze pixels, manage ROIs/annotations, batch processing, for high-content screening and microscopy workflows." +description: "Microscopy data management platform. Access images via Python, retrieve datasets, analyze pixels, manage ROIs/annotations, batch processing, for high-content screening and microscopy workflows." --- # OMERO Integration ## Overview -OMERO is an open-source client-server platform for managing, visualizing, and analyzing microscopy images and associated metadata. This skill provides comprehensive guidance for using OMERO's Python API (omero-py) to programmatically interact with OMERO servers for data retrieval, analysis, and management. +OMERO is an open-source platform for managing, visualizing, and analyzing microscopy images and metadata. Access images via Python API, retrieve datasets, analyze pixels, manage ROIs and annotations, for high-content screening and microscopy workflows. + +## When to Use This Skill + +This skill should be used when: +- Working with OMERO Python API (omero-py) to access microscopy data +- Retrieving images, datasets, projects, or screening data programmatically +- Analyzing pixel data and creating derived images +- Creating or managing ROIs (regions of interest) on microscopy images +- Adding annotations, tags, or metadata to OMERO objects +- Storing measurement results in OMERO tables +- Creating server-side scripts for batch processing +- Performing high-content screening analysis ## Core Capabilities diff --git a/scientific-integrations/opentrons-integration/SKILL.md b/scientific-integrations/opentrons-integration/SKILL.md index 82c41b5..903e6f7 100644 --- a/scientific-integrations/opentrons-integration/SKILL.md +++ b/scientific-integrations/opentrons-integration/SKILL.md @@ -1,13 +1,25 @@ --- name: opentrons-integration -description: "Opentrons lab automation. Write Protocol API v2 protocols for Flex/OT-2 robots, liquid handling, hardware modules (heater-shaker, thermocycler), labware management, for automated pipetting workflows." +description: "Lab automation platform for Flex/OT-2 robots. Write Protocol API v2 protocols, liquid handling, hardware modules (heater-shaker, thermocycler), labware management, for automated pipetting workflows." --- # Opentrons Integration ## Overview -Opentrons provides a Python-based automation platform for laboratory protocols using Flex and OT-2 robots. This skill enables creation and management of Python Protocol API v2 protocols for automated liquid handling, hardware module control, and complex laboratory workflows. +Opentrons is a Python-based lab automation platform for Flex and OT-2 robots. Write Protocol API v2 protocols for liquid handling, control hardware modules (heater-shaker, thermocycler), manage labware, for automated pipetting workflows. + +## When to Use This Skill + +This skill should be used when: +- Writing Opentrons Protocol API v2 protocols in Python +- Automating liquid handling workflows on Flex or OT-2 robots +- Controlling hardware modules (temperature, magnetic, heater-shaker, thermocycler) +- Setting up labware configurations and deck layouts +- Implementing complex pipetting operations (serial dilutions, plate replication, PCR setup) +- Managing tip usage and optimizing protocol efficiency +- Working with multi-channel pipettes for 96-well plate operations +- Simulating and testing protocols before robot execution ## Core Capabilities diff --git a/scientific-packages/anndata/SKILL.md b/scientific-packages/anndata/SKILL.md index 3aa1ff3..ebe32fe 100644 --- a/scientific-packages/anndata/SKILL.md +++ b/scientific-packages/anndata/SKILL.md @@ -7,7 +7,7 @@ description: "Manipulate AnnData objects for single-cell genomics. Load/save .h5 ## Overview -AnnData (Annotated Data) is Python's standard for storing and manipulating annotated data matrices, particularly in single-cell genomics. This skill provides comprehensive guidance for working with AnnData objects, including data creation, manipulation, file I/O, concatenation, and best practices for memory-efficient workflows. +AnnData (Annotated Data) is Python's standard for storing and manipulating annotated data matrices, particularly in single-cell genomics. Work with AnnData objects for data creation, manipulation, file I/O, concatenation, and memory-efficient workflows. ## Core Capabilities diff --git a/scientific-packages/arboreto/SKILL.md b/scientific-packages/arboreto/SKILL.md index 40501c2..ff3c9a5 100644 --- a/scientific-packages/arboreto/SKILL.md +++ b/scientific-packages/arboreto/SKILL.md @@ -11,7 +11,7 @@ Arboreto is a Python library for inferring gene regulatory networks (GRNs) from ## When to Use This Skill -Apply this skill when: +This skill should be used when: - Inferring regulatory relationships between genes from expression data - Analyzing single-cell or bulk RNA-seq data to identify transcription factor targets - Building the GRN inference component of a pySCENIC pipeline diff --git a/scientific-packages/biomni/SKILL.md b/scientific-packages/biomni/SKILL.md index 1e4227f..c656921 100644 --- a/scientific-packages/biomni/SKILL.md +++ b/scientific-packages/biomni/SKILL.md @@ -7,7 +7,7 @@ description: "AI agent for autonomous biomedical task execution. CRISPR design, ## Overview -Biomni is a general-purpose biomedical AI agent that autonomously executes research tasks across diverse biomedical subfields. It combines large language model reasoning with retrieval-augmented planning and code-based execution to enhance scientific productivity and hypothesis generation. The system operates with an ~11GB biomedical knowledge base covering molecular, genomic, and clinical domains. +Biomni is a general-purpose biomedical AI agent that autonomously executes research tasks across diverse biomedical subfields. Use Biomni to combine large language model reasoning with retrieval-augmented planning and code-based execution for scientific productivity and hypothesis generation. The system operates with an ~11GB biomedical knowledge base covering molecular, genomic, and clinical domains. ## Quick Start diff --git a/scientific-packages/biopython/SKILL.md b/scientific-packages/biopython/SKILL.md index e4c4e70..930c466 100644 --- a/scientific-packages/biopython/SKILL.md +++ b/scientific-packages/biopython/SKILL.md @@ -1,6 +1,6 @@ --- name: biopython -description: Work with Biopython for computational molecular biology tasks including sequence manipulation, file I/O, alignment analysis, BLAST searches, database access (NCBI/Entrez), protein structure analysis (PDB), phylogenetic tree operations, motif finding, population genetics, and other bioinformatics workflows. This skill should be used when working with biological sequences (DNA, RNA, protein), parsing biological file formats (FASTA, GenBank, FASTQ, PDB, etc.), accessing biological databases, running sequence analyses, or performing structural bioinformatics tasks. +description: "Toolkit for computational molecular biology. Manipulate sequences, parse biological file formats (FASTA, GenBank, FASTQ, PDB), access NCBI databases, run BLAST, analyze structures, build phylogenetic trees, for bioinformatics workflows." --- # Biopython: Computational Molecular Biology in Python diff --git a/scientific-packages/bioservices/SKILL.md b/scientific-packages/bioservices/SKILL.md index ac5a91f..98bb93e 100644 --- a/scientific-packages/bioservices/SKILL.md +++ b/scientific-packages/bioservices/SKILL.md @@ -7,11 +7,11 @@ description: "Access 40+ bio web services (UniProt, KEGG, ChEMBL, PubChem, BLAST ## Overview -BioServices is a Python package providing programmatic access to approximately 40 bioinformatics web services and databases. Use this skill to retrieve biological data, perform cross-database queries, map identifiers, analyze sequences, and integrate multiple biological resources in Python workflows. The package handles both REST and SOAP/WSDL protocols transparently. +BioServices is a Python package providing programmatic access to approximately 40 bioinformatics web services and databases. Retrieve biological data, perform cross-database queries, map identifiers, analyze sequences, and integrate multiple biological resources in Python workflows. The package handles both REST and SOAP/WSDL protocols transparently. ## When to Use This Skill -Apply this skill when tasks involve: +This skill should be used when: - Retrieving protein sequences, annotations, or structures from UniProt, PDB, Pfam - Analyzing metabolic pathways and gene functions via KEGG or Reactome - Searching compound databases (ChEBI, ChEMBL, PubChem) for chemical information diff --git a/scientific-packages/cellxgene-census/SKILL.md b/scientific-packages/cellxgene-census/SKILL.md index 6253a2a..3adac4b 100644 --- a/scientific-packages/cellxgene-census/SKILL.md +++ b/scientific-packages/cellxgene-census/SKILL.md @@ -18,7 +18,7 @@ The Census includes: ## When to Use This Skill -This skill should be used when tasks involve: +This skill should be used when: - Querying single-cell expression data by cell type, tissue, or disease - Exploring available single-cell datasets and metadata - Training machine learning models on single-cell data diff --git a/scientific-packages/cobrapy/SKILL.md b/scientific-packages/cobrapy/SKILL.md index 7e150fd..07135c2 100644 --- a/scientific-packages/cobrapy/SKILL.md +++ b/scientific-packages/cobrapy/SKILL.md @@ -7,7 +7,7 @@ description: "Constraint-based metabolic modeling (COBRA). FBA, FVA, gene knocko ## Overview -COBRApy is a Python library for constraint-based reconstruction and analysis (COBRA) of metabolic models, essential for systems biology research. Use this skill to work with genome-scale metabolic models, perform computational simulations of cellular metabolism, conduct metabolic engineering analyses, and predict phenotypic behaviors. +COBRApy is a Python library for constraint-based reconstruction and analysis (COBRA) of metabolic models, essential for systems biology research. Work with genome-scale metabolic models, perform computational simulations of cellular metabolism, conduct metabolic engineering analyses, and predict phenotypic behaviors. ## Core Capabilities diff --git a/scientific-packages/dask/SKILL.md b/scientific-packages/dask/SKILL.md index 8110c0a..2903a85 100644 --- a/scientific-packages/dask/SKILL.md +++ b/scientific-packages/dask/SKILL.md @@ -14,9 +14,9 @@ Dask is a Python library for parallel and distributed computing that enables thr Dask scales from laptops (processing ~100 GiB) to clusters (processing ~100 TiB) while maintaining familiar Python APIs. -## When to Use Dask +## When to Use This Skill -Apply this skill when users need to: +This skill should be used when: - Process datasets that exceed available RAM - Scale pandas or NumPy operations to larger datasets - Parallelize computations for performance improvements diff --git a/scientific-packages/datamol/SKILL.md b/scientific-packages/datamol/SKILL.md index 7c5c6c6..4313dd5 100644 --- a/scientific-packages/datamol/SKILL.md +++ b/scientific-packages/datamol/SKILL.md @@ -7,7 +7,7 @@ description: "Pythonic RDKit wrapper for cheminformatics. SMILES parsing, standa ## Overview -Datamol is a Python library that provides a lightweight, Pythonic abstraction layer over RDKit for molecular cheminformatics. It simplifies complex molecular operations with sensible defaults, efficient parallelization, and modern I/O capabilities. All molecular objects are native `rdkit.Chem.Mol` instances, ensuring full compatibility with the RDKit ecosystem. +Datamol is a Python library that provides a lightweight, Pythonic abstraction layer over RDKit for molecular cheminformatics. Simplify complex molecular operations with sensible defaults, efficient parallelization, and modern I/O capabilities. All molecular objects are native `rdkit.Chem.Mol` instances, ensuring full compatibility with the RDKit ecosystem. **Key capabilities**: - Molecular format conversion (SMILES, SELFIES, InChI) diff --git a/scientific-packages/deepchem/SKILL.md b/scientific-packages/deepchem/SKILL.md index f828948..36484b6 100644 --- a/scientific-packages/deepchem/SKILL.md +++ b/scientific-packages/deepchem/SKILL.md @@ -11,7 +11,7 @@ DeepChem is a comprehensive Python library for applying machine learning to chem ## When to Use This Skill -Apply this skill when: +This skill should be used when: - Loading and processing molecular data (SMILES strings, SDF files, protein sequences) - Predicting molecular properties (solubility, toxicity, binding affinity, ADMET properties) - Training models on chemical/biological datasets diff --git a/scientific-packages/deeptools/SKILL.md b/scientific-packages/deeptools/SKILL.md index 3a0bc48..3ee305c 100644 --- a/scientific-packages/deeptools/SKILL.md +++ b/scientific-packages/deeptools/SKILL.md @@ -7,7 +7,7 @@ description: "NGS analysis toolkit. BAM to bigWig conversion, QC (correlation, P ## Overview -deepTools is a comprehensive suite of Python command-line tools designed for processing and analyzing high-throughput sequencing data. This skill provides guidance for using deepTools to perform quality control, normalize data, compare samples, and generate publication-quality visualizations for ChIP-seq, RNA-seq, ATAC-seq, MNase-seq, and other NGS experiments. +deepTools is a comprehensive suite of Python command-line tools designed for processing and analyzing high-throughput sequencing data. Use deepTools to perform quality control, normalize data, compare samples, and generate publication-quality visualizations for ChIP-seq, RNA-seq, ATAC-seq, MNase-seq, and other NGS experiments. **Core capabilities:** - Convert BAM alignments to normalized coverage tracks (bigWig/bedGraph) @@ -18,7 +18,7 @@ deepTools is a comprehensive suite of Python command-line tools designed for pro ## When to Use This Skill -This skill should be used when users request tasks involving: +This skill should be used when: - **File conversion**: "Convert BAM to bigWig", "generate coverage tracks", "normalize ChIP-seq data" - **Quality control**: "check ChIP quality", "compare replicates", "assess sequencing depth", "QC analysis" diff --git a/scientific-packages/diffdock/SKILL.md b/scientific-packages/diffdock/SKILL.md index ec1dfb6..8ae09f7 100644 --- a/scientific-packages/diffdock/SKILL.md +++ b/scientific-packages/diffdock/SKILL.md @@ -18,9 +18,9 @@ DiffDock is a diffusion-based deep learning tool for molecular docking that pred **Key Distinction:** DiffDock predicts **binding poses** (3D structure) and **confidence** (prediction certainty), NOT binding affinity (ΔG, Kd). Always combine with scoring functions (GNINA, MM/GBSA) for affinity assessment. -## When to Use DiffDock +## When to Use This Skill -Invoke this skill when users request: +This skill should be used when: - "Dock this ligand to a protein" or "predict binding pose" - "Run molecular docking" or "perform protein-ligand docking" diff --git a/scientific-packages/etetoolkit/SKILL.md b/scientific-packages/etetoolkit/SKILL.md index ca83429..174a8ce 100644 --- a/scientific-packages/etetoolkit/SKILL.md +++ b/scientific-packages/etetoolkit/SKILL.md @@ -7,7 +7,7 @@ description: "Phylogenetic tree toolkit (ETE). Tree manipulation (Newick/NHX), e ## Overview -Provide comprehensive support for phylogenetic and hierarchical tree analysis using the ETE (Environment for Tree Exploration) toolkit. Enable tree manipulation, evolutionary analysis, visualization, and integration with biological databases for phylogenomic research and clustering analysis. +ETE (Environment for Tree Exploration) is a toolkit for phylogenetic and hierarchical tree analysis. Manipulate trees, analyze evolutionary events, visualize results, and integrate with biological databases for phylogenomic research and clustering analysis. ## Core Capabilities diff --git a/scientific-packages/flowio/SKILL.md b/scientific-packages/flowio/SKILL.md index 74c4384..64cfa14 100644 --- a/scientific-packages/flowio/SKILL.md +++ b/scientific-packages/flowio/SKILL.md @@ -7,11 +7,11 @@ description: "Parse FCS (Flow Cytometry Standard) files v2.0-3.1. Extract events ## Overview -FlowIO is a lightweight Python library for reading and writing Flow Cytometry Standard (FCS) files. It excels at parsing FCS metadata, extracting event data, and creating new FCS files with minimal dependencies. The library supports FCS versions 2.0, 3.0, and 3.1, making it ideal for backend services, data pipelines, and basic cytometry file operations. +FlowIO is a lightweight Python library for reading and writing Flow Cytometry Standard (FCS) files. Parse FCS metadata, extract event data, and create new FCS files with minimal dependencies. The library supports FCS versions 2.0, 3.0, and 3.1, making it ideal for backend services, data pipelines, and basic cytometry file operations. ## When to Use This Skill -Apply this skill when working with: +This skill should be used when: - FCS files requiring parsing or metadata extraction - Flow cytometry data needing conversion to NumPy arrays diff --git a/scientific-packages/gget/SKILL.md b/scientific-packages/gget/SKILL.md index 3f74d73..8c8d861 100644 --- a/scientific-packages/gget/SKILL.md +++ b/scientific-packages/gget/SKILL.md @@ -7,7 +7,7 @@ description: "Bioinformatics query toolkit. Gene info (Ensembl/UniProt), BLAST, ## Overview -gget is a command-line bioinformatics tool and Python package providing unified access to 20+ genomic databases and analysis methods. Execute queries for gene information, sequence analysis, protein structures, expression data, and disease associations through a consistent interface. All gget modules work both as command-line tools and as Python functions. +gget is a command-line bioinformatics tool and Python package providing unified access to 20+ genomic databases and analysis methods. Query gene information, sequence analysis, protein structures, expression data, and disease associations through a consistent interface. All gget modules work both as command-line tools and as Python functions. **Important**: The databases queried by gget are continuously updated, which sometimes changes their structure. gget modules are tested automatically on a biweekly basis and updated to match new database structures when necessary. diff --git a/scientific-packages/matchms/SKILL.md b/scientific-packages/matchms/SKILL.md index fad33b9..daa1fdd 100644 --- a/scientific-packages/matchms/SKILL.md +++ b/scientific-packages/matchms/SKILL.md @@ -7,7 +7,7 @@ description: "Mass spectrometry analysis. Process mzML/MGF/MSP, spectral similar ## Overview -Matchms is an open-source Python library for mass spectrometry data processing and analysis. It provides tools for importing spectra from various formats, standardizing metadata, filtering peaks, calculating spectral similarities, and building reproducible analytical workflows. The library democratizes mass spectrometry informatics through accessible, standardized Python tools. +Matchms is an open-source Python library for mass spectrometry data processing and analysis. Import spectra from various formats, standardize metadata, filter peaks, calculate spectral similarities, and build reproducible analytical workflows. ## Core Capabilities diff --git a/scientific-packages/matplotlib/SKILL.md b/scientific-packages/matplotlib/SKILL.md index 76d584d..032fb51 100644 --- a/scientific-packages/matplotlib/SKILL.md +++ b/scientific-packages/matplotlib/SKILL.md @@ -11,7 +11,7 @@ Matplotlib is Python's foundational visualization library for creating static, a ## When to Use This Skill -Apply this skill when: +This skill should be used when: - Creating any type of plot or chart (line, scatter, bar, histogram, heatmap, contour, etc.) - Generating scientific or statistical visualizations - Customizing plot appearance (colors, styles, labels, legends) diff --git a/scientific-packages/medchem/SKILL.md b/scientific-packages/medchem/SKILL.md index 71cf197..b1091d5 100644 --- a/scientific-packages/medchem/SKILL.md +++ b/scientific-packages/medchem/SKILL.md @@ -7,9 +7,17 @@ description: "Medicinal chemistry filters. Apply drug-likeness rules (Lipinski, ## Overview -Medchem is a Python library for molecular filtering and prioritization in drug discovery workflows. It provides hundreds of well-established and novel molecular filters, structural alerts, and medicinal chemistry rules to efficiently triage and prioritize compound libraries at scale. +Medchem is a Python library for molecular filtering and prioritization in drug discovery workflows. Apply hundreds of well-established and novel molecular filters, structural alerts, and medicinal chemistry rules to efficiently triage and prioritize compound libraries at scale. Rules and filters are context-specific—use as guidelines combined with domain expertise. -**Key Principle:** Rules and filters are always context-specific. Avoid blindly applying filters—marketed drugs often don't pass standard medchem filters, and prodrugs may intentionally violate rules. Use these tools as guidelines combined with domain expertise. +## When to Use This Skill + +This skill should be used when: +- Applying drug-likeness rules (Lipinski, Veber, etc.) to compound libraries +- Filtering molecules by structural alerts or PAINS patterns +- Prioritizing compounds for lead optimization +- Assessing compound quality and medicinal chemistry properties +- Detecting reactive or problematic functional groups +- Calculating molecular complexity metrics ## Installation diff --git a/scientific-packages/molfeat/SKILL.md b/scientific-packages/molfeat/SKILL.md index e6f0779..e7e278f 100644 --- a/scientific-packages/molfeat/SKILL.md +++ b/scientific-packages/molfeat/SKILL.md @@ -7,18 +7,11 @@ description: "Molecular featurization for ML (100+ featurizers). ECFP, MACCS, de ## Overview -Molfeat is a comprehensive Python library for molecular featurization that unifies pre-trained embeddings and hand-crafted featurizers into a single, fast, and user-friendly package. Convert chemical structures (SMILES strings or RDKit molecules) into numerical representations suitable for machine learning tasks including QSAR modeling, virtual screening, similarity searching, and deep learning applications. - -**Key Capabilities:** -- 100+ featurizers including fingerprints, descriptors, and pretrained models -- Fast parallel processing with simple API -- Scikit-learn compatible transformers -- Built-in caching and state persistence -- Integration with PyTorch, TensorFlow, and graph neural networks +Molfeat is a comprehensive Python library for molecular featurization that unifies 100+ pre-trained embeddings and hand-crafted featurizers. Convert chemical structures (SMILES strings or RDKit molecules) into numerical representations for machine learning tasks including QSAR modeling, virtual screening, similarity searching, and deep learning applications. Features fast parallel processing, scikit-learn compatible transformers, and built-in caching. ## When to Use This Skill -Apply molfeat when working with: +This skill should be used when working with: - **Molecular machine learning**: Building QSAR/QSPR models, property prediction - **Virtual screening**: Ranking compound libraries for biological activity - **Similarity searching**: Finding structurally similar molecules diff --git a/scientific-packages/polars/SKILL.md b/scientific-packages/polars/SKILL.md index 10e7851..ad68833 100644 --- a/scientific-packages/polars/SKILL.md +++ b/scientific-packages/polars/SKILL.md @@ -7,7 +7,7 @@ description: "Fast DataFrame library (Apache Arrow). Select, filter, group_by, j ## Overview -Polars is a lightning-fast DataFrame library for Python (and Rust) built on Apache Arrow. This skill provides guidance for working with Polars, including its expression-based API, lazy evaluation framework, and high-performance data manipulation capabilities. Use this skill when helping users write efficient data processing code, migrate from pandas, or optimize data pipelines. +Polars is a lightning-fast DataFrame library for Python and Rust built on Apache Arrow. Work with Polars' expression-based API, lazy evaluation framework, and high-performance data manipulation capabilities for efficient data processing, pandas migration, and data pipeline optimization. ## Quick Start diff --git a/scientific-packages/pydeseq2/SKILL.md b/scientific-packages/pydeseq2/SKILL.md index 548aceb..56cbfdf 100644 --- a/scientific-packages/pydeseq2/SKILL.md +++ b/scientific-packages/pydeseq2/SKILL.md @@ -7,15 +7,7 @@ description: "Differential gene expression analysis (Python DESeq2). Identify DE ## Overview -PyDESeq2 is a Python implementation of the DESeq2 method for differential expression analysis (DEA) with bulk RNA-seq data. This skill provides comprehensive support for designing and executing PyDESeq2 workflows, from data loading through result interpretation. - -**Key capabilities:** -- Single-factor and multi-factor experimental designs -- Statistical testing using Wald tests with multiple testing correction -- Optional apeGLM log-fold-change shrinkage -- Data preprocessing and quality control -- Result export and visualization -- Integration with pandas, AnnData, and the Python data science ecosystem +PyDESeq2 is a Python implementation of DESeq2 for differential expression analysis with bulk RNA-seq data. Design and execute complete workflows from data loading through result interpretation, including single-factor and multi-factor designs, Wald tests with multiple testing correction, optional apeGLM shrinkage, and integration with pandas and AnnData. ## When to Use This Skill diff --git a/scientific-packages/pymatgen/SKILL.md b/scientific-packages/pymatgen/SKILL.md index 9971795..05dd129 100644 --- a/scientific-packages/pymatgen/SKILL.md +++ b/scientific-packages/pymatgen/SKILL.md @@ -7,15 +7,7 @@ description: "Materials science toolkit. Crystal structures (CIF, POSCAR), phase ## Overview -Pymatgen is a comprehensive Python library for materials analysis that powers the Materials Project. This skill provides guidance for using pymatgen's extensive capabilities in computational materials science, including: - -- **Structure manipulation**: Creating, reading, writing, and transforming crystal structures and molecules -- **Materials analysis**: Symmetry, coordination environments, bonding, and structure comparison -- **Thermodynamics**: Phase diagrams, Pourbaix diagrams, reaction energies, and stability analysis -- **Electronic structure**: Band structures, density of states, and Fermi surfaces -- **Surfaces and interfaces**: Slab generation, Wulff shapes, adsorption sites, and interface construction -- **Materials Project integration**: Programmatic access to hundreds of thousands of computed materials -- **File I/O**: Support for 100+ file formats from various computational codes +Pymatgen is a comprehensive Python library for materials analysis that powers the Materials Project. Create, analyze, and manipulate crystal structures and molecules, compute phase diagrams and thermodynamic properties, analyze electronic structure (band structures, DOS), generate surfaces and interfaces, and access Materials Project's database of computed materials. Supports 100+ file formats from various computational codes. ## When to Use This Skill diff --git a/scientific-packages/pymc/SKILL.md b/scientific-packages/pymc/SKILL.md index c17a71d..1d30dfd 100644 --- a/scientific-packages/pymc/SKILL.md +++ b/scientific-packages/pymc/SKILL.md @@ -7,7 +7,7 @@ description: "Bayesian modeling with PyMC. Build hierarchical models, MCMC (NUTS ## Overview -PyMC is a Python library for Bayesian modeling and probabilistic programming. This skill provides comprehensive guidance for building, fitting, validating, and comparing Bayesian models using PyMC's modern API (version 5.x+). It includes workflows for common model types, diagnostic procedures, and best practices for Bayesian inference. +PyMC is a Python library for Bayesian modeling and probabilistic programming. Build, fit, validate, and compare Bayesian models using PyMC's modern API (version 5.x+), including hierarchical models, MCMC sampling (NUTS), variational inference, and model comparison (LOO, WAIC). ## When to Use This Skill diff --git a/scientific-packages/pymoo/SKILL.md b/scientific-packages/pymoo/SKILL.md index ae2160d..96d9a12 100644 --- a/scientific-packages/pymoo/SKILL.md +++ b/scientific-packages/pymoo/SKILL.md @@ -7,11 +7,11 @@ description: "Multi-objective optimization framework. NSGA-II, NSGA-III, MOEA/D, ## Overview -Pymoo is a comprehensive Python framework for solving optimization problems with emphasis on multi-objective optimization. The library provides state-of-the-art single-objective and multi-objective algorithms, extensive benchmark problems, customizable genetic operators, advanced visualization tools, and multi-criteria decision making methods. Pymoo excels at finding trade-off solutions (Pareto fronts) for problems with conflicting objectives. +Pymoo is a comprehensive Python framework for optimization with emphasis on multi-objective problems. Solve single and multi-objective optimization using state-of-the-art algorithms (NSGA-II/III, MOEA/D), benchmark problems (ZDT, DTLZ), customizable genetic operators, and multi-criteria decision making methods. Excels at finding trade-off solutions (Pareto fronts) for problems with conflicting objectives. ## When to Use This Skill -Apply this skill when: +This skill should be used when: - Solving optimization problems with one or multiple objectives - Finding Pareto-optimal solutions and analyzing trade-offs - Implementing evolutionary algorithms (GA, DE, PSO, NSGA-II/III) diff --git a/scientific-packages/pyopenms/SKILL.md b/scientific-packages/pyopenms/SKILL.md index 0358816..65651fa 100644 --- a/scientific-packages/pyopenms/SKILL.md +++ b/scientific-packages/pyopenms/SKILL.md @@ -7,9 +7,17 @@ description: "Mass spectrometry toolkit (OpenMS Python). Process mzML/mzXML, pea ## Overview -pyOpenMS is an open-source Python library providing comprehensive tools for mass spectrometry data analysis in proteomics and metabolomics research. It offers Python bindings to the OpenMS C++ library, enabling efficient processing of LC-MS/MS data, peptide identification, feature detection, quantification, and integration with common proteomics tools like Comet, Mascot, MSGF+, Percolator, and MSstats. +pyOpenMS is an open-source Python library for mass spectrometry data analysis in proteomics and metabolomics. Process LC-MS/MS data, perform peptide identification, detect and quantify features, and integrate with common proteomics tools (Comet, Mascot, MSGF+, Percolator, MSstats) using Python bindings to the OpenMS C++ library. -Use this skill when working with mass spectrometry data analysis tasks, processing proteomics or metabolomics datasets, or implementing computational workflows for biomolecular identification and quantification. +## When to Use This Skill + +This skill should be used when: +- Processing mass spectrometry data (mzML, mzXML files) +- Performing peak picking and feature detection in LC-MS data +- Conducting peptide and protein identification workflows +- Quantifying metabolites or proteins +- Integrating proteomics or metabolomics tools into Python pipelines +- Working with OpenMS tools and file formats ## Core Capabilities diff --git a/scientific-packages/pysam/SKILL.md b/scientific-packages/pysam/SKILL.md index 9b894d5..33ae787 100644 --- a/scientific-packages/pysam/SKILL.md +++ b/scientific-packages/pysam/SKILL.md @@ -7,18 +7,11 @@ description: "Genomic file toolkit. Read/write SAM/BAM/CRAM alignments, VCF/BCF ## Overview -Pysam is a Python module for reading, manipulating, and writing genomic datasets. It provides a Pythonic interface to the htslib C-API, supporting multiple genomic file formats commonly used in bioinformatics and computational biology. +Pysam is a Python module for reading, manipulating, and writing genomic datasets. Read/write SAM/BAM/CRAM alignment files, VCF/BCF variant files, and FASTA/FASTQ sequences with a Pythonic interface to htslib. Query tabix-indexed files, perform pileup analysis for coverage, and execute samtools/bcftools commands. -**Key capabilities:** -- Read/write SAM/BAM/CRAM alignment files (aligned sequencing reads) -- Read/write VCF/BCF variant call files (genetic variants) -- Access FASTA reference sequences with random access -- Read FASTQ files (raw sequencing reads with quality scores) -- Query tabix-indexed files (BED, GTF, GFF) -- Perform pileup analysis for coverage calculations -- Execute samtools and bcftools commands from Python +## When to Use This Skill -**When to use this skill:** +This skill should be used when: - Working with sequencing alignment files (BAM/CRAM) - Analyzing genetic variants (VCF/BCF) - Extracting reference sequences or gene regions diff --git a/scientific-packages/pytdc/SKILL.md b/scientific-packages/pytdc/SKILL.md index 1895924..2970d53 100644 --- a/scientific-packages/pytdc/SKILL.md +++ b/scientific-packages/pytdc/SKILL.md @@ -7,9 +7,18 @@ description: "Therapeutics Data Commons. AI-ready drug discovery datasets (ADME, ## Overview -PyTDC is an open-science platform providing AI-ready datasets and benchmarks for drug discovery and development. It offers curated datasets spanning the entire therapeutics pipeline, from target discovery through clinical development, with standardized evaluation metrics and meaningful data splits. +PyTDC is an open-science platform providing AI-ready datasets and benchmarks for drug discovery and development. Access curated datasets spanning the entire therapeutics pipeline with standardized evaluation metrics and meaningful data splits, organized into three categories: single-instance prediction (molecular/protein properties), multi-instance prediction (drug-target interactions, DDI), and generation (molecule generation, retrosynthesis). -The platform organizes therapeutic tasks into three major categories: single-instance prediction for properties of individual biomedical entities, multi-instance prediction for relationships between multiple entities, and generation for creating new therapeutic molecules. +## When to Use This Skill + +This skill should be used when: +- Working with drug discovery or therapeutic ML datasets +- Benchmarking machine learning models on standardized pharmaceutical tasks +- Predicting molecular properties (ADME, toxicity, bioactivity) +- Predicting drug-target or drug-drug interactions +- Generating novel molecules with desired properties +- Accessing curated datasets with proper train/test splits (scaffold, cold-split) +- Using molecular oracles for property optimization ## Installation & Setup diff --git a/scientific-packages/pytorch-lightning/SKILL.md b/scientific-packages/pytorch-lightning/SKILL.md index 6fd1981..82b700c 100644 --- a/scientific-packages/pytorch-lightning/SKILL.md +++ b/scientific-packages/pytorch-lightning/SKILL.md @@ -1,13 +1,23 @@ --- name: pytorch-lightning -description: Work with PyTorch Lightning for deep learning model training and research. This skill should be used when building, training, or deploying neural networks using PyTorch Lightning, organizing PyTorch code into LightningModules, configuring Trainers for multi-GPU/TPU training, implementing data pipelines with LightningDataModules, or working with callbacks, logging, and distributed training strategies (DDP, FSDP, DeepSpeed). +description: "Deep learning framework (PyTorch Lightning). Organize PyTorch code into LightningModules, configure Trainers for multi-GPU/TPU, implement data pipelines, callbacks, logging (W&B, TensorBoard), distributed training (DDP, FSDP, DeepSpeed), for scalable neural network training." --- # PyTorch Lightning ## Overview -PyTorch Lightning is a deep learning framework that organizes PyTorch code to eliminate boilerplate while maintaining full flexibility. It automates training workflows, multi-device orchestration, and best practices from research labs. Use this skill when working with neural network training, scaling models across multiple GPUs/TPUs, or structuring deep learning projects professionally. +PyTorch Lightning is a deep learning framework that organizes PyTorch code to eliminate boilerplate while maintaining full flexibility. Automate training workflows, multi-device orchestration, and implement best practices for neural network training and scaling across multiple GPUs/TPUs. + +## When to Use This Skill + +This skill should be used when: +- Building, training, or deploying neural networks using PyTorch Lightning +- Organizing PyTorch code into LightningModules +- Configuring Trainers for multi-GPU/TPU training +- Implementing data pipelines with LightningDataModules +- Working with callbacks, logging, and distributed training strategies (DDP, FSDP, DeepSpeed) +- Structuring deep learning projects professionally ## Core Capabilities diff --git a/scientific-packages/scanpy/SKILL.md b/scientific-packages/scanpy/SKILL.md index 0ebdb45..3c5c51c 100644 --- a/scientific-packages/scanpy/SKILL.md +++ b/scientific-packages/scanpy/SKILL.md @@ -7,11 +7,11 @@ description: "Single-cell RNA-seq analysis. Load .h5ad/10X data, QC, normalizati ## Overview -This skill provides comprehensive support for analyzing single-cell RNA-seq data using scanpy, a scalable Python toolkit built on AnnData. Use this skill for complete single-cell workflows including quality control, normalization, dimensionality reduction, clustering, marker gene identification, visualization, and trajectory analysis. +Scanpy is a scalable Python toolkit for analyzing single-cell RNA-seq data, built on AnnData. Apply this skill for complete single-cell workflows including quality control, normalization, dimensionality reduction, clustering, marker gene identification, visualization, and trajectory analysis. ## When to Use This Skill -Activate this skill when: +This skill should be used when: - Analyzing single-cell RNA-seq data (.h5ad, 10X, CSV formats) - Performing quality control on scRNA-seq datasets - Creating UMAP, t-SNE, or PCA visualizations diff --git a/scientific-packages/scikit-bio/SKILL.md b/scientific-packages/scikit-bio/SKILL.md index 377162d..ee1afa0 100644 --- a/scientific-packages/scikit-bio/SKILL.md +++ b/scientific-packages/scikit-bio/SKILL.md @@ -7,9 +7,7 @@ description: "Biological data toolkit. Sequence analysis, alignments, phylogenet ## Overview -scikit-bio is a comprehensive Python library for working with biological data. Provide assistance with bioinformatics analyses spanning sequence manipulation, alignment, phylogenetics, microbial ecology, and multivariate statistics. This skill enables efficient work with common biological file formats and computational workflows in genomics, metagenomics, and ecological research. - -**Key applications:** Sequence analysis, phylogenetic tree construction, microbiome diversity analysis, ecological statistics, biological data manipulation, and format conversion. +scikit-bio is a comprehensive Python library for working with biological data. Apply this skill for bioinformatics analyses spanning sequence manipulation, alignment, phylogenetics, microbial ecology, and multivariate statistics. ## When to Use This Skill diff --git a/scientific-packages/scikit-learn/SKILL.md b/scientific-packages/scikit-learn/SKILL.md index 57bdf78..ab3ba9a 100644 --- a/scientific-packages/scikit-learn/SKILL.md +++ b/scientific-packages/scikit-learn/SKILL.md @@ -7,11 +7,11 @@ description: "ML toolkit. Classification, regression, clustering, PCA, preproces ## Overview -This skill provides comprehensive guidance for using scikit-learn, Python's premier machine learning library. Scikit-learn offers simple, efficient tools for predictive data analysis, including classification, regression, clustering, dimensionality reduction, model selection, and preprocessing. This skill should be used when implementing machine learning workflows, building predictive models, analyzing datasets using supervised or unsupervised learning, preprocessing data for ML tasks, evaluating model performance, or optimizing hyperparameters. +Scikit-learn is Python's premier machine learning library, offering simple and efficient tools for predictive data analysis. Apply this skill for classification, regression, clustering, dimensionality reduction, model selection, preprocessing, and hyperparameter optimization. ## When to Use This Skill -Activate this skill when: +This skill should be used when: - Building classification models (spam detection, image recognition, medical diagnosis) - Creating regression models (price prediction, forecasting, trend analysis) - Performing clustering analysis (customer segmentation, pattern discovery) diff --git a/scientific-packages/seaborn/SKILL.md b/scientific-packages/seaborn/SKILL.md index 219cf6c..82ab3e3 100644 --- a/scientific-packages/seaborn/SKILL.md +++ b/scientific-packages/seaborn/SKILL.md @@ -7,7 +7,7 @@ description: "Statistical visualization. Scatter, box, violin, heatmaps, pair pl ## Overview -Seaborn is a Python visualization library providing a high-level, dataset-oriented interface for creating publication-quality statistical graphics. Built on matplotlib, seaborn emphasizes declarative syntax that allows focus on data relationships rather than visual implementation details. The library excels at multivariate analysis, automatic statistical estimation, and creating complex multi-panel figures with minimal code. +Seaborn is a Python visualization library for creating publication-quality statistical graphics. Use this skill for dataset-oriented plotting, multivariate analysis, automatic statistical estimation, and complex multi-panel figures with minimal code. ## Design Philosophy diff --git a/scientific-packages/statsmodels/SKILL.md b/scientific-packages/statsmodels/SKILL.md index 3c0ff09..909a2ae 100644 --- a/scientific-packages/statsmodels/SKILL.md +++ b/scientific-packages/statsmodels/SKILL.md @@ -7,11 +7,11 @@ description: "Statistical modeling toolkit. OLS, GLM, logistic, ARIMA, time seri ## Overview -Statsmodels is Python's premier library for statistical modeling, providing tools for estimation, inference, and diagnostics across a wide range of statistical methods. This skill provides comprehensive guidance for conducting rigorous statistical analysis, from simple linear regression to complex time series models and econometric analyses. +Statsmodels is Python's premier library for statistical modeling, providing tools for estimation, inference, and diagnostics across a wide range of statistical methods. Apply this skill for rigorous statistical analysis, from simple linear regression to complex time series models and econometric analyses. ## When to Use This Skill -Activate this skill when: +This skill should be used when: - Fitting regression models (OLS, WLS, GLS, quantile regression) - Performing generalized linear modeling (logistic, Poisson, Gamma, etc.) - Analyzing discrete outcomes (binary, multinomial, count, ordinal) diff --git a/scientific-packages/torch_geometric/SKILL.md b/scientific-packages/torch_geometric/SKILL.md index ea263c6..b320fc3 100644 --- a/scientific-packages/torch_geometric/SKILL.md +++ b/scientific-packages/torch_geometric/SKILL.md @@ -7,7 +7,7 @@ description: "Graph Neural Networks (PyG). Node/graph classification, link predi ## Overview -PyTorch Geometric is a library built on PyTorch that enables development and training of Graph Neural Networks (GNNs) for applications involving structured data. It provides comprehensive tools for deep learning on graphs and other irregular structures (geometric deep learning), including mini-batch processing, multi-GPU support, and extensive benchmark datasets. +PyTorch Geometric is a library built on PyTorch for developing and training Graph Neural Networks (GNNs). Apply this skill for deep learning on graphs and irregular structures, including mini-batch processing, multi-GPU training, and geometric deep learning applications. ## When to Use This Skill diff --git a/scientific-packages/torchdrug/SKILL.md b/scientific-packages/torchdrug/SKILL.md index c56ab40..e504ac8 100644 --- a/scientific-packages/torchdrug/SKILL.md +++ b/scientific-packages/torchdrug/SKILL.md @@ -1,26 +1,17 @@ --- name: torchdrug -description: Toolkit for graph-based drug discovery and molecular machine learning using TorchDrug. This skill should be used when working with molecular property prediction, protein modeling, knowledge graph reasoning, molecular generation, retrosynthesis prediction, or implementing graph neural networks for drug discovery and chemical/biological data. Use when tasks involve SMILES strings, molecular graphs, protein structures, drug datasets, binding prediction, or any PyTorch-based graph ML for life sciences. +description: "Graph-based drug discovery toolkit. Molecular property prediction (ADMET), protein modeling, knowledge graph reasoning, molecular generation, retrosynthesis, GNNs (GIN, GAT, SchNet), 40+ datasets, for PyTorch-based ML on molecules, proteins, and biomedical graphs." --- # TorchDrug ## Overview -TorchDrug is a comprehensive PyTorch-based machine learning toolbox for drug discovery and molecular science. It provides graph neural networks, pre-trained models, datasets, and task definitions for working with molecules, proteins, and biological knowledge graphs. - -**Core Capabilities:** -- Molecular property prediction (drug-likeness, toxicity, quantum properties) -- Protein modeling (function, structure, interactions) -- Knowledge graph reasoning (drug-disease associations, biomedical knowledge) -- Molecular generation (de novo drug design) -- Retrosynthesis planning (synthetic route prediction) -- 40+ curated datasets for chemistry and biology -- 20+ state-of-the-art model architectures +TorchDrug is a comprehensive PyTorch-based machine learning toolbox for drug discovery and molecular science. Apply graph neural networks, pre-trained models, and task definitions to molecules, proteins, and biological knowledge graphs, including molecular property prediction, protein modeling, knowledge graph reasoning, molecular generation, retrosynthesis planning, with 40+ curated datasets and 20+ model architectures. ## When to Use This Skill -Use this skill when working with: +This skill should be used when working with: **Data Types:** - SMILES strings or molecular structures diff --git a/scientific-packages/transformers/SKILL.md b/scientific-packages/transformers/SKILL.md index a5fdf15..31101b9 100644 --- a/scientific-packages/transformers/SKILL.md +++ b/scientific-packages/transformers/SKILL.md @@ -7,9 +7,7 @@ description: Work with state-of-the-art machine learning models for NLP, compute ## Overview -The Transformers library provides state-of-the-art machine learning models for natural language processing (NLP), computer vision, audio processing, and multimodal tasks. It offers over 1 million pre-trained model checkpoints and supports quick inference through pipelines, comprehensive training via the Trainer API, and flexible text generation with various decoding strategies. - -This skill provides comprehensive guidance on working with Transformers across all major task types and modalities. +The Transformers library provides state-of-the-art machine learning models for NLP, computer vision, audio, and multimodal tasks. Apply this skill for quick inference through pipelines, comprehensive training via the Trainer API, and flexible text generation with various decoding strategies. ## Core Capabilities diff --git a/scientific-packages/umap-learn/SKILL.md b/scientific-packages/umap-learn/SKILL.md index 9f38ffa..3c3768b 100644 --- a/scientific-packages/umap-learn/SKILL.md +++ b/scientific-packages/umap-learn/SKILL.md @@ -7,15 +7,7 @@ description: "UMAP dimensionality reduction. Fast nonlinear manifold learning fo ## Overview -UMAP (Uniform Manifold Approximation and Projection) is a dimensionality reduction technique designed for both visualization and general non-linear dimensionality reduction. It is faster than t-SNE while producing comparable or superior results, and uniquely scales well to higher embedding dimensions (beyond 2D/3D). UMAP preserves both local and global structure in data and supports supervised learning, making it versatile for visualization, clustering preprocessing, and feature engineering. - -**Key capabilities:** -- Fast, scalable dimensionality reduction for visualization -- Supervised and semi-supervised learning with label information -- Effective preprocessing for density-based clustering (HDBSCAN) -- Transform new data using trained models -- Parametric embeddings via neural networks -- Inverse transforms for data reconstruction +UMAP (Uniform Manifold Approximation and Projection) is a dimensionality reduction technique for visualization and general non-linear dimensionality reduction. Apply this skill for fast, scalable embeddings that preserve local and global structure, supervised learning, and clustering preprocessing. ## Quick Start diff --git a/scientific-packages/zarr-python/SKILL.md b/scientific-packages/zarr-python/SKILL.md index d3c1b0b..a0aeff8 100644 --- a/scientific-packages/zarr-python/SKILL.md +++ b/scientific-packages/zarr-python/SKILL.md @@ -7,15 +7,7 @@ description: "Chunked N-D arrays for cloud storage. Compressed arrays, parallel ## Overview -Zarr is a Python library for storage of large N-dimensional arrays that are chunked and compressed. It provides a NumPy-like API but divides data into manageable chunks stored separately, enabling efficient parallel I/O, cloud-native workflows, and seamless integration with the scientific Python ecosystem (NumPy, Dask, Xarray). - -**Key capabilities:** -- Create and manipulate N-dimensional arrays with NumPy-like semantics -- Configure chunking strategies for optimal parallel access and performance -- Apply compression algorithms (Blosc, Zstandard, Gzip, etc.) to reduce storage -- Use flexible storage backends: local filesystem, memory, ZIP files, or cloud storage (S3, GCS) -- Organize data hierarchically using groups (similar to HDF5) -- Integrate seamlessly with Dask for parallel computing and Xarray for labeled arrays +Zarr is a Python library for storing large N-dimensional arrays with chunking and compression. Apply this skill for efficient parallel I/O, cloud-native workflows, and seamless integration with NumPy, Dask, and Xarray. ## Quick Start diff --git a/scientific-thinking/document-skills/docx/SKILL.md b/scientific-thinking/document-skills/docx/SKILL.md index c2d78c9..3fb8086 100644 --- a/scientific-thinking/document-skills/docx/SKILL.md +++ b/scientific-thinking/document-skills/docx/SKILL.md @@ -8,7 +8,7 @@ license: Proprietary. LICENSE.txt has complete terms ## Overview -This skill supports creating, editing, or analyzing the contents of .docx files. A .docx file is essentially a ZIP archive containing XML files and other resources. Different tools and workflows are available for different tasks. +A .docx file is a ZIP archive containing XML files and resources. Create, edit, or analyze Word documents using text extraction, raw XML access, or redlining workflows. Apply this skill for professional document processing, tracked changes, and content manipulation. ## Workflow Decision Tree diff --git a/scientific-thinking/document-skills/pdf/SKILL.md b/scientific-thinking/document-skills/pdf/SKILL.md index 18bd9be..8ca8ea5 100644 --- a/scientific-thinking/document-skills/pdf/SKILL.md +++ b/scientific-thinking/document-skills/pdf/SKILL.md @@ -8,7 +8,7 @@ license: Proprietary. LICENSE.txt has complete terms ## Overview -This guide covers essential PDF processing operations using Python libraries and command-line tools. For advanced features, JavaScript libraries, and detailed examples, see reference.md. If you need to fill out a PDF form, read forms.md and follow its instructions. +Extract text/tables, create PDFs, merge/split files, fill forms using Python libraries and command-line tools. Apply this skill for programmatic document processing and analysis. For advanced features or form filling, consult reference.md and forms.md. ## Quick Start diff --git a/scientific-thinking/document-skills/pptx/SKILL.md b/scientific-thinking/document-skills/pptx/SKILL.md index 1493ea4..8ab2db9 100644 --- a/scientific-thinking/document-skills/pptx/SKILL.md +++ b/scientific-thinking/document-skills/pptx/SKILL.md @@ -8,7 +8,7 @@ license: Proprietary. LICENSE.txt has complete terms ## Overview -This skill supports creating, editing, or analyzing the contents of .pptx files. A .pptx file is essentially a ZIP archive containing XML files and other resources. Different tools and workflows are available for different tasks. +A .pptx file is a ZIP archive containing XML files and resources. Create, edit, or analyze PowerPoint presentations using text extraction, raw XML access, or html2pptx workflows. Apply this skill for programmatic presentation creation and modification. ## Reading and analyzing content diff --git a/scientific-thinking/document-skills/xlsx/SKILL.md b/scientific-thinking/document-skills/xlsx/SKILL.md index 86422e0..b3776a1 100644 --- a/scientific-thinking/document-skills/xlsx/SKILL.md +++ b/scientific-thinking/document-skills/xlsx/SKILL.md @@ -64,7 +64,7 @@ Unless otherwise stated by the user or existing template ## Overview -A user may ask you to create, edit, or analyze the contents of an .xlsx file. You have different tools and workflows available for different tasks. +Create, edit, or analyze Excel spreadsheets with formulas, formatting, and data analysis. Apply this skill for spreadsheet processing using openpyxl and pandas. Recalculate formulas and ensure zero errors for publication-quality outputs. ## Important Requirements diff --git a/scientific-thinking/exploratory-data-analysis/SKILL.md b/scientific-thinking/exploratory-data-analysis/SKILL.md index aa461a4..c26ff15 100644 --- a/scientific-thinking/exploratory-data-analysis/SKILL.md +++ b/scientific-thinking/exploratory-data-analysis/SKILL.md @@ -7,7 +7,7 @@ description: "EDA toolkit. Analyze CSV/Excel/JSON/Parquet files, statistical sum ## Overview -Perform comprehensive exploratory data analysis on datasets of any format. This skill acts as a proficient data scientist, automatically analyzing data to generate meaningful summaries, advanced statistics, visualizations, and actionable insights. All textual outputs are generated as markdown for seamless integration into workflows. +EDA is a process for discovering patterns, anomalies, and relationships in data. Analyze CSV/Excel/JSON/Parquet files to generate statistical summaries, distributions, correlations, outliers, and visualizations. All outputs are markdown-formatted for integration into workflows. ## When to Use This Skill diff --git a/scientific-thinking/hypothesis-generation/SKILL.md b/scientific-thinking/hypothesis-generation/SKILL.md index 01f4296..3308691 100644 --- a/scientific-thinking/hypothesis-generation/SKILL.md +++ b/scientific-thinking/hypothesis-generation/SKILL.md @@ -7,7 +7,17 @@ description: "Generate testable hypotheses. Formulate from observations, design ## Overview -Generate rigorous, evidence-based scientific hypotheses that are testable, falsifiable, and grounded in existing literature. This skill provides a systematic workflow for transforming observations into well-structured hypotheses with experimental designs and testable predictions. +Hypothesis generation is a systematic process for developing testable explanations. Formulate evidence-based hypotheses from observations, design experiments, explore competing explanations, and develop predictions. Apply this skill for scientific inquiry across domains. + +## When to Use This Skill + +This skill should be used when: +- Developing hypotheses from observations or preliminary data +- Designing experiments to test scientific questions +- Exploring competing explanations for phenomena +- Formulating testable predictions for research +- Conducting literature-based hypothesis generation +- Planning mechanistic studies across scientific domains ## Workflow diff --git a/scientific-thinking/peer-review/SKILL.md b/scientific-thinking/peer-review/SKILL.md index cdc2660..cc3d496 100644 --- a/scientific-thinking/peer-review/SKILL.md +++ b/scientific-thinking/peer-review/SKILL.md @@ -7,7 +7,18 @@ description: "Systematic peer review toolkit. Evaluate methodology, statistics, ## Overview -This skill enables comprehensive, high-quality peer review of scientific manuscripts across all disciplines. The approach emphasizes constructive criticism, methodological rigor, reproducibility, and clarity, following best practices from leading journals and peer review guidelines. +Peer review is a systematic process for evaluating scientific manuscripts. Assess methodology, statistics, design, reproducibility, ethics, and reporting standards. Apply this skill for manuscript and grant review across disciplines with constructive, rigorous evaluation. + +## When to Use This Skill + +This skill should be used when: +- Conducting peer review of scientific manuscripts for journals +- Evaluating grant proposals and research applications +- Assessing methodology and experimental design rigor +- Reviewing statistical analyses and reporting standards +- Evaluating reproducibility and data availability +- Checking compliance with reporting guidelines (CONSORT, STROBE, PRISMA) +- Providing constructive feedback on scientific writing ## Peer Review Workflow diff --git a/scientific-thinking/scientific-brainstorming/SKILL.md b/scientific-thinking/scientific-brainstorming/SKILL.md index 3ef285d..8bd52ef 100644 --- a/scientific-thinking/scientific-brainstorming/SKILL.md +++ b/scientific-thinking/scientific-brainstorming/SKILL.md @@ -7,7 +7,18 @@ description: "Research ideation partner. Generate hypotheses, explore interdisci ## Overview -Transform into an intelligent scientific thought partner that guides researchers through structured creative ideation. This skill enables deep, conversational brainstorming sessions that help scientists generate novel ideas, make unexpected connections, challenge assumptions, and develop innovative research directions. +Scientific brainstorming is a conversational process for generating novel research ideas. Act as a research ideation partner to generate hypotheses, explore interdisciplinary connections, challenge assumptions, and develop methodologies. Apply this skill for creative scientific problem-solving. + +## When to Use This Skill + +This skill should be used when: +- Generating novel research ideas or directions +- Exploring interdisciplinary connections and analogies +- Challenging assumptions in existing research frameworks +- Developing new methodological approaches +- Identifying research gaps or opportunities +- Overcoming creative blocks in problem-solving +- Brainstorming experimental designs or study plans ## Core Principles diff --git a/scientific-thinking/scientific-critical-thinking/SKILL.md b/scientific-thinking/scientific-critical-thinking/SKILL.md index 3b26564..180617d 100644 --- a/scientific-thinking/scientific-critical-thinking/SKILL.md +++ b/scientific-thinking/scientific-critical-thinking/SKILL.md @@ -7,7 +7,18 @@ description: "Evaluate research rigor. Assess methodology, experimental design, ## Overview -Apply systematic, rigorous critical thinking to scientific work using established methodological principles, evidence evaluation frameworks, and logical reasoning. Analyze research methodology, identify biases and fallacies, evaluate statistical claims, assess evidence quality, and provide constructive critique grounded in scientific principles. +Critical thinking is a systematic process for evaluating scientific rigor. Assess methodology, experimental design, statistical validity, biases, confounding, and evidence quality using GRADE and Cochrane ROB frameworks. Apply this skill for critical analysis of scientific claims. + +## When to Use This Skill + +This skill should be used when: +- Evaluating research methodology and experimental design +- Assessing statistical validity and evidence quality +- Identifying biases and confounding in studies +- Reviewing scientific claims and conclusions +- Conducting systematic reviews or meta-analyses +- Applying GRADE or Cochrane risk of bias assessments +- Providing critical analysis of research papers ## Core Capabilities diff --git a/scientific-thinking/scientific-visualization/SKILL.md b/scientific-thinking/scientific-visualization/SKILL.md index 40dcbf6..d6140fe 100644 --- a/scientific-thinking/scientific-visualization/SKILL.md +++ b/scientific-thinking/scientific-visualization/SKILL.md @@ -7,17 +7,11 @@ description: "Create publication figures with matplotlib/seaborn/plotly. Multi-p ## Overview -This skill provides comprehensive guidance, tools, and best practices for creating publication-ready scientific figures. It covers proper figure composition, colorblind-friendly design, journal-specific requirements, and practical implementation using matplotlib, seaborn, and plotly. - -Publication-ready figures must be: -- **Clear**: Immediately understandable with proper labeling -- **Accurate**: Truthful data representation without distortion -- **Accessible**: Interpretable by readers with color vision deficiencies -- **Professional**: Polished appearance meeting journal standards +Scientific visualization transforms data into clear, accurate figures for publication. Create journal-ready plots with multi-panel layouts, error bars, significance markers, and colorblind-safe palettes. Export as PDF/EPS/TIFF using matplotlib, seaborn, and plotly for manuscripts. ## When to Use This Skill -Activate this skill when: +This skill should be used when: - Creating plots or visualizations for scientific manuscripts - Preparing figures for journal submission (Nature, Science, Cell, PLOS, etc.) - Ensuring figures are colorblind-friendly and accessible diff --git a/scientific-thinking/scientific-writing/SKILL.md b/scientific-thinking/scientific-writing/SKILL.md index 449a549..7f7351f 100644 --- a/scientific-thinking/scientific-writing/SKILL.md +++ b/scientific-thinking/scientific-writing/SKILL.md @@ -7,7 +7,7 @@ description: "Write scientific manuscripts. IMRAD structure, citations (APA/AMA/ ## Overview -Scientific writing is a specialized form of communication that requires precision, clarity, and adherence to established conventions. This skill provides comprehensive guidance for creating high-quality scientific manuscripts, from initial structure to final submission. Whether drafting a research article, review paper, case report, or thesis, this skill ensures writing meets the rigorous standards of academic and scientific publishing. +Scientific writing is a process for communicating research with precision and clarity. Write manuscripts using IMRAD structure, citations (APA/AMA/Vancouver), figures/tables, and reporting guidelines (CONSORT/STROBE/PRISMA). Apply this skill for research papers and journal submissions. ## When to Use This Skill diff --git a/scientific-thinking/statistical-analysis/SKILL.md b/scientific-thinking/statistical-analysis/SKILL.md index 64f8727..bbf6198 100644 --- a/scientific-thinking/statistical-analysis/SKILL.md +++ b/scientific-thinking/statistical-analysis/SKILL.md @@ -7,7 +7,18 @@ description: "Statistical analysis toolkit. Hypothesis tests (t-test, ANOVA, chi ## Overview -Conduct rigorous, publication-quality statistical analyses with comprehensive assumption checking, effect size calculations, and proper reporting. This skill provides systematic workflows for selecting appropriate statistical tests, validating assumptions, interpreting results, and reporting findings according to academic standards (APA style). +Statistical analysis is a systematic process for testing hypotheses and quantifying relationships. Conduct hypothesis tests (t-test, ANOVA, chi-square), regression, correlation, and Bayesian analyses with assumption checks and APA reporting. Apply this skill for academic research. + +## When to Use This Skill + +This skill should be used when: +- Conducting statistical hypothesis tests (t-tests, ANOVA, chi-square) +- Performing regression or correlation analyses +- Running Bayesian statistical analyses +- Checking statistical assumptions and diagnostics +- Calculating effect sizes and conducting power analyses +- Reporting statistical results in APA format +- Analyzing experimental or observational data for research ---