From 77822efeedceb9b0aadea050ed6cd64487dd8796 Mon Sep 17 00:00:00 2001
From: Timothy Kassis <timothy.kassis@biostate.ai>
Date: Wed, 22 Oct 2025 08:38:06 -0700
Subject: [PATCH] Improved Biomni support

---
 .claude-plugin/marketplace.json               |    2 +-
 docs/scientific-packages.md                   |    4 +-
 scientific-packages/biomni/SKILL.md           |  573 +++----
 .../biomni/references/api_reference.md        |  813 ++++-----
 .../biomni/references/llm_providers.md        | 1068 +++++-------
 .../biomni/references/task_examples.md        | 1472 -----------------
 .../biomni/references/use_cases.md            |  867 ++++++++++
 .../biomni/scripts/generate_report.py         |  649 ++++----
 .../biomni/scripts/setup_environment.py       |  457 +++--
 9 files changed, 2512 insertions(+), 3393 deletions(-)
 delete mode 100644 scientific-packages/biomni/references/task_examples.md
 create mode 100644 scientific-packages/biomni/references/use_cases.md
 mode change 100644 => 100755 scientific-packages/biomni/scripts/generate_report.py
 mode change 100644 => 100755 scientific-packages/biomni/scripts/setup_environment.py

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index d3b2a2a..5be8456 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -7,7 +7,7 @@
   },
   "metadata": {
     "description": "Claude scientific skills from K-Dense Inc",
-    "version": "1.50.0"
+    "version": "1.51.0"
   },
   "plugins": [
     {
diff --git a/docs/scientific-packages.md b/docs/scientific-packages.md
index dc77903..fb60224 100644
--- a/docs/scientific-packages.md
+++ b/docs/scientific-packages.md
@@ -56,7 +56,7 @@
 - **scikit-bio** - Bioinformatics sequence analysis and diversity metrics
 - **Zarr** - Chunked, compressed N-dimensional array storage
 
-## Multi-omics & Integration
-- **BIOMNI** - Multi-omics data integration with LLM-powered analysis
+## Multi-omics & AI Agent Frameworks
+- **BIOMNI** - Autonomous biomedical AI agent framework from Stanford SNAP lab for executing complex research tasks across genomics, drug discovery, molecular biology, and clinical analysis. Combines LLM reasoning with code execution and ~11GB of integrated biomedical databases (Ensembl, NCBI Gene, UniProt, PDB, AlphaFold, ClinVar, OMIM, HPO, PubMed, KEGG, Reactome, GO). Supports multiple LLM providers (Claude, GPT-4, Gemini, Groq, Bedrock). Includes A1 agent class for autonomous task decomposition, BiomniEval1 benchmark framework, and MCP server integration. Use cases: CRISPR screening design, single-cell RNA-seq analysis, ADMET prediction, GWAS interpretation, rare disease diagnosis, protein structure analysis, literature synthesis, and multi-omics integration
 
 
diff --git a/scientific-packages/biomni/SKILL.md b/scientific-packages/biomni/SKILL.md
index c656921..3f96366 100644
--- a/scientific-packages/biomni/SKILL.md
+++ b/scientific-packages/biomni/SKILL.md
@@ -1,375 +1,316 @@
 ---
 name: biomni
-description: "AI agent for autonomous biomedical task execution. CRISPR design, scRNA-seq, ADMET, GWAS, structure prediction, multi-omics, with automated planning/code generation, for complex workflows."
+description: Autonomous biomedical AI agent framework for executing complex research tasks across genomics, drug discovery, molecular biology, and clinical analysis. Use this skill when conducting multi-step biomedical research including CRISPR screening design, single-cell RNA-seq analysis, ADMET prediction, GWAS interpretation, rare disease diagnosis, or lab protocol optimization. Leverages LLM reasoning with code execution and integrated biomedical databases.
 ---
 
 # Biomni
 
 ## Overview
 
-Biomni is a general-purpose biomedical AI agent that autonomously executes research tasks across diverse biomedical subfields. Use Biomni to combine large language model reasoning with retrieval-augmented planning and code-based execution for scientific productivity and hypothesis generation. The system operates with an ~11GB biomedical knowledge base covering molecular, genomic, and clinical domains.
+Biomni is an open-source biomedical AI agent framework from Stanford's SNAP lab that autonomously executes complex research tasks across biomedical domains. Use this skill when working on multi-step biological reasoning tasks, analyzing biomedical data, or conducting research spanning genomics, drug discovery, molecular biology, and clinical analysis.
+
+## Core Capabilities
+
+Biomni excels at:
+
+1. **Multi-step biological reasoning** - Autonomous task decomposition and planning for complex biomedical queries
+2. **Code generation and execution** - Dynamic analysis pipeline creation for data processing
+3. **Knowledge retrieval** - Access to ~11GB of integrated biomedical databases and literature
+4. **Cross-domain problem solving** - Unified interface for genomics, proteomics, drug discovery, and clinical tasks
+
+## When to Use This Skill
+
+Use biomni for:
+- **CRISPR screening** - Design screens, prioritize genes, analyze knockout effects
+- **Single-cell RNA-seq** - Cell type annotation, differential expression, trajectory analysis
+- **Drug discovery** - ADMET prediction, target identification, compound optimization
+- **GWAS analysis** - Variant interpretation, causal gene identification, pathway enrichment
+- **Clinical genomics** - Rare disease diagnosis, variant pathogenicity, phenotype-genotype mapping
+- **Lab protocols** - Protocol optimization, literature synthesis, experimental design
 
 ## Quick Start
 
-Initialize and use the Biomni agent with these basic steps:
+### Installation and Setup
+
+Biomni requires conda environment setup and API keys for LLM providers:
+
+```bash
+# Clone repository and set up environment
+git clone https://github.com/snap-stanford/biomni
+cd biomni
+bash setup.sh
+
+# Or install via pip
+conda activate biomni_e1
+pip install biomni --upgrade
+```
+
+Configure API keys (store in `.env` file or environment variables):
+```bash
+export ANTHROPIC_API_KEY="your-key-here"
+# Optional: OpenAI, Azure, Google, Groq, AWS Bedrock keys
+```
+
+Use `scripts/setup_environment.py` for interactive setup assistance.
+
+### Basic Usage Pattern
 
 ```python
 from biomni.agent import A1
 
-# Initialize agent with data path and LLM model
+# Initialize agent with data path and LLM choice
 agent = A1(path='./data', llm='claude-sonnet-4-20250514')
 
-# Execute a biomedical research task
-agent.go("Your biomedical task description")
+# Execute biomedical task autonomously
+agent.go("Your biomedical research question or task")
+
+# Save conversation history and results
+agent.save_conversation_history("report.pdf")
 ```
 
-The agent will autonomously decompose the task, retrieve relevant biomedical knowledge, generate and execute code, and provide results.
+## Working with Biomni
 
-## Installation and Setup
+### 1. Agent Initialization
 
-### Environment Preparation
-
-1. **Set up the conda environment:**
-   - Follow instructions in `biomni_env/README.md` from the repository
-   - Activate the environment: `conda activate biomni_e1`
-
-2. **Install the package:**
-   ```bash
-   pip install biomni --upgrade
-   ```
-
-   Or install from source:
-   ```bash
-   git clone https://github.com/snap-stanford/biomni.git
-   cd biomni
-   pip install -e .
-   ```
-
-3. **Configure API keys:**
-
-   Set up credentials via environment variables or `.env` file:
-   ```bash
-   export ANTHROPIC_API_KEY="your-key-here"
-   export OPENAI_API_KEY="your-key-here"  # Optional
-   ```
-
-4. **Data initialization:**
-
-   On first use, the agent will automatically download the ~11GB biomedical knowledge base.
-
-### LLM Provider Configuration
-
-Biomni supports multiple LLM providers. Configure the default provider using:
+The A1 class is the primary interface for biomni:
 
 ```python
+from biomni.agent import A1
 from biomni.config import default_config
 
-# Set the default LLM model
-default_config.llm = "claude-sonnet-4-20250514"  # Anthropic
-# default_config.llm = "gpt-4"  # OpenAI
-# default_config.llm = "azure/gpt-4"  # Azure OpenAI
-# default_config.llm = "gemini/gemini-pro"  # Google Gemini
-
-# Set timeout (optional)
-default_config.timeout_seconds = 1200
-
-# Set data path (optional)
-default_config.data_path = "./custom/data/path"
-```
-
-Refer to `references/llm_providers.md` for detailed configuration options for each provider.
-
-## Core Biomedical Research Tasks
-
-### 1. CRISPR Screening and Design
-
-Execute CRISPR screening tasks including guide RNA design, off-target analysis, and screening experiment planning:
-
-```python
-agent.go("Design a CRISPR screening experiment to identify genes involved in cancer cell resistance to drug X")
-```
-
-The agent will:
-- Retrieve relevant gene databases
-- Design guide RNAs with specificity analysis
-- Plan experimental controls and readout strategies
-- Generate analysis code for screening results
-
-### 2. Single-Cell RNA-seq Analysis
-
-Perform comprehensive scRNA-seq analysis workflows:
-
-```python
-agent.go("Analyze this 10X Genomics scRNA-seq dataset, identify cell types, and find differentially expressed genes between clusters")
-```
-
-Capabilities include:
-- Quality control and preprocessing
-- Dimensionality reduction and clustering
-- Cell type annotation using marker databases
-- Differential expression analysis
-- Pathway enrichment analysis
-
-### 3. Molecular Property Prediction (ADMET)
-
-Predict absorption, distribution, metabolism, excretion, and toxicity properties:
-
-```python
-agent.go("Predict ADMET properties for these drug candidates: [SMILES strings]")
-```
-
-The agent handles:
-- Molecular descriptor calculation
-- Property prediction using integrated models
-- Toxicity screening
-- Drug-likeness assessment
-
-### 4. Genomic Analysis
-
-Execute genomic data analysis tasks:
-
-```python
-agent.go("Perform GWAS analysis to identify SNPs associated with disease phenotype in this cohort")
-```
-
-Supports:
-- Genome-wide association studies (GWAS)
-- Variant calling and annotation
-- Population genetics analysis
-- Functional genomics integration
-
-### 5. Protein Structure and Function
-
-Analyze protein sequences and structures:
-
-```python
-agent.go("Predict the structure of this protein sequence and identify potential binding sites")
-```
-
-Capabilities:
-- Sequence analysis and domain identification
-- Structure prediction integration
-- Binding site prediction
-- Protein-protein interaction analysis
-
-### 6. Disease Diagnosis and Classification
-
-Perform disease classification from multi-omics data:
-
-```python
-agent.go("Build a classifier to diagnose disease X from patient RNA-seq and clinical data")
-```
-
-### 7. Systems Biology and Pathway Analysis
-
-Analyze biological pathways and networks:
-
-```python
-agent.go("Identify dysregulated pathways in this differential expression dataset")
-```
-
-### 8. Drug Discovery and Repurposing
-
-Support drug discovery workflows:
-
-```python
-agent.go("Identify FDA-approved drugs that could be repurposed for treating disease Y based on mechanism of action")
-```
-
-## Advanced Features
-
-### Custom Configuration per Agent
-
-Override global configuration for specific agent instances:
-
-```python
+# Basic initialization
 agent = A1(
-    path='./project_data',
-    llm='gpt-4o',
-    timeout=1800
+    path='./data',  # Path to data lake (~11GB downloaded on first use)
+    llm='claude-sonnet-4-20250514'  # LLM model selection
 )
+
+# Advanced configuration
+default_config.llm = "gpt-4"
+default_config.timeout_seconds = 1200
+default_config.max_iterations = 50
 ```
 
-### Conversation History and Reporting
+**Supported LLM Providers:**
+- Anthropic Claude (recommended): `claude-sonnet-4-20250514`, `claude-opus-4-20250514`
+- OpenAI: `gpt-4`, `gpt-4-turbo`
+- Azure OpenAI: via Azure configuration
+- Google Gemini: `gemini-2.0-flash-exp`
+- Groq: `llama-3.3-70b-versatile`
+- AWS Bedrock: Various models via Bedrock API
 
-Save execution traces as formatted PDF reports:
+See `references/llm_providers.md` for detailed LLM configuration instructions.
+
+### 2. Task Execution Workflow
+
+Biomni follows an autonomous agent workflow:
 
 ```python
-# After executing tasks
-agent.save_conversation_history(
-    output_path='./reports/experiment_log.pdf',
-    format='pdf'
+# Step 1: Initialize agent
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+# Step 2: Execute task with natural language query
+result = agent.go("""
+Design a CRISPR screen to identify genes regulating autophagy in
+HEK293 cells. Prioritize genes based on essentiality and pathway
+relevance.
+""")
+
+# Step 3: Review generated code and analysis
+# Agent autonomously:
+# - Decomposes task into sub-steps
+# - Retrieves relevant biological knowledge
+# - Generates and executes analysis code
+# - Interprets results and provides insights
+
+# Step 4: Save results
+agent.save_conversation_history("autophagy_screen_report.pdf")
+```
+
+### 3. Common Task Patterns
+
+#### CRISPR Screening Design
+```python
+agent.go("""
+Design a genome-wide CRISPR knockout screen for identifying genes
+affecting [phenotype] in [cell type]. Include:
+1. sgRNA library design
+2. Gene prioritization criteria
+3. Expected hit genes based on pathway analysis
+""")
+```
+
+#### Single-Cell RNA-seq Analysis
+```python
+agent.go("""
+Analyze this single-cell RNA-seq dataset:
+- Perform quality control and filtering
+- Identify cell populations via clustering
+- Annotate cell types using marker genes
+- Conduct differential expression between conditions
+File path: [path/to/data.h5ad]
+""")
+```
+
+#### Drug ADMET Prediction
+```python
+agent.go("""
+Predict ADMET properties for these drug candidates:
+[SMILES strings or compound IDs]
+Focus on:
+- Absorption (Caco-2 permeability, HIA)
+- Distribution (plasma protein binding, BBB penetration)
+- Metabolism (CYP450 interaction)
+- Excretion (clearance)
+- Toxicity (hERG liability, hepatotoxicity)
+""")
+```
+
+#### GWAS Variant Interpretation
+```python
+agent.go("""
+Interpret GWAS results for [trait/disease]:
+- Identify genome-wide significant variants
+- Map variants to causal genes
+- Perform pathway enrichment analysis
+- Predict functional consequences
+Summary statistics file: [path/to/gwas_summary.txt]
+""")
+```
+
+See `references/use_cases.md` for comprehensive task examples across all biomedical domains.
+
+### 4. Data Integration
+
+Biomni integrates ~11GB of biomedical knowledge sources:
+- **Gene databases** - Ensembl, NCBI Gene, UniProt
+- **Protein structures** - PDB, AlphaFold
+- **Clinical datasets** - ClinVar, OMIM, HPO
+- **Literature indices** - PubMed abstracts, biomedical ontologies
+- **Pathway databases** - KEGG, Reactome, GO
+
+Data is automatically downloaded to the specified `path` on first use.
+
+### 5. MCP Server Integration
+
+Extend biomni with external tools via Model Context Protocol:
+
+```python
+# MCP servers can provide:
+# - FDA drug databases
+# - Web search for literature
+# - Custom biomedical APIs
+# - Laboratory equipment interfaces
+
+# Configure MCP servers in .biomni/mcp_config.json
+```
+
+### 6. Evaluation Framework
+
+Benchmark agent performance on biomedical tasks:
+
+```python
+from biomni.eval import BiomniEval1
+
+evaluator = BiomniEval1()
+
+# Evaluate on specific task types
+score = evaluator.evaluate(
+    task_type='crispr_design',
+    instance_id='test_001',
+    answer=agent_output
 )
+
+# Access evaluation dataset
+dataset = evaluator.load_dataset()
 ```
 
-Requires one of: WeasyPrint, markdown2pdf, or Pandoc.
-
-### Model Context Protocol (MCP) Integration
-
-Extend agent capabilities with external tools:
-
-```python
-# Add MCP-compatible tools
-agent.add_mcp(config_path='./mcp_config.json')
-```
-
-MCP enables integration with:
-- Laboratory information management systems (LIMS)
-- Specialized bioinformatics databases
-- Custom analysis pipelines
-- External computational resources
-
-### Using Biomni-R0 (Specialized Reasoning Model)
-
-Deploy the 32B parameter Biomni-R0 model for enhanced biological reasoning:
-
-```bash
-# Install SGLang
-pip install "sglang[all]"
-
-# Deploy Biomni-R0
-python -m sglang.launch_server \
-    --model-path snap-stanford/biomni-r0 \
-    --port 30000 \
-    --trust-remote-code
-```
-
-Then configure the agent:
-
-```python
-from biomni.config import default_config
-
-default_config.llm = "openai/biomni-r0"
-default_config.api_base = "http://localhost:30000/v1"
-```
-
-Biomni-R0 provides specialized reasoning for:
-- Complex multi-step biological workflows
-- Hypothesis generation and evaluation
-- Experimental design optimization
-- Literature-informed analysis
-
 ## Best Practices
 
-### Task Specification
-
-Provide clear, specific task descriptions:
-
-✅ **Good:** "Analyze this scRNA-seq dataset (file: data.h5ad) to identify T cell subtypes, then perform differential expression analysis comparing activated vs. resting T cells"
-
-❌ **Vague:** "Analyze my RNA-seq data"
-
-### Data Organization
-
-Structure data directories for efficient retrieval:
-
-```
-project/
-├── data/              # Biomni knowledge base
-├── raw_data/          # Your experimental data
-├── results/           # Analysis outputs
-└── reports/           # Generated reports
-```
-
-### Iterative Refinement
-
-Use iterative task execution for complex analyses:
-
-```python
-# Step 1: Exploratory analysis
-agent.go("Load and perform initial QC on the proteomics dataset")
-
-# Step 2: Based on results, refine analysis
-agent.go("Based on the QC results, remove low-quality samples and normalize using method X")
-
-# Step 3: Downstream analysis
-agent.go("Perform differential abundance analysis with adjusted parameters")
-```
+### Task Formulation
+- **Be specific** - Include biological context, organism, cell type, conditions
+- **Specify outputs** - Clearly state desired analysis outputs and formats
+- **Provide data paths** - Include file paths for datasets to analyze
+- **Set constraints** - Mention time/computational limits if relevant
 
 ### Security Considerations
+⚠️ **Important**: Biomni executes LLM-generated code with full system privileges. For production use:
+- Run in isolated environments (Docker, VMs)
+- Avoid exposing sensitive credentials
+- Review generated code before execution in sensitive contexts
+- Use sandboxed execution environments when possible
 
-**CRITICAL:** Biomni executes LLM-generated code with full system privileges. For production use:
+### Performance Optimization
+- **Choose appropriate LLMs** - Claude Sonnet 4 recommended for balance of speed/quality
+- **Set reasonable timeouts** - Adjust `default_config.timeout_seconds` for complex tasks
+- **Monitor iterations** - Track `max_iterations` to prevent runaway loops
+- **Cache data** - Reuse downloaded data lake across sessions
 
-1. **Use sandboxed environments:** Deploy in Docker containers or VMs with restricted permissions
-2. **Validate sensitive operations:** Review code before execution for file access, network calls, or credential usage
-3. **Limit data access:** Restrict agent access to only necessary data directories
-4. **Monitor execution:** Log all executed code for audit trails
+### Result Documentation
+```python
+# Always save conversation history for reproducibility
+agent.save_conversation_history("results/project_name_YYYYMMDD.pdf")
 
-Never run Biomni with:
-- Unrestricted file system access
-- Direct access to sensitive credentials
-- Network access to production systems
-- Elevated system privileges
+# Include in reports:
+# - Original task description
+# - Generated analysis code
+# - Results and interpretations
+# - Data sources used
+```
 
-### Model Selection Guidelines
+## Resources
 
-Choose models based on task complexity:
+### References
+Detailed documentation available in the `references/` directory:
 
-- **Claude Sonnet 4:** Recommended for most biomedical tasks, excellent biological reasoning
-- **GPT-4/GPT-4o:** Strong general capabilities, good for diverse tasks
-- **Biomni-R0:** Specialized for complex biological reasoning, multi-step workflows
-- **Smaller models:** Use for simple, well-defined tasks to reduce cost
+- **`api_reference.md`** - Complete API documentation for A1 class, configuration, and evaluation
+- **`llm_providers.md`** - LLM provider setup (Anthropic, OpenAI, Azure, Google, Groq, AWS)
+- **`use_cases.md`** - Comprehensive task examples for all biomedical domains
 
-## Evaluation and Benchmarking
+### Scripts
+Helper scripts in the `scripts/` directory:
 
-Biomni-Eval1 benchmark contains 433 evaluation instances across 10 biological tasks:
+- **`setup_environment.py`** - Interactive environment and API key configuration
+- **`generate_report.py`** - Enhanced PDF report generation with custom formatting
 
-- GWAS analysis
-- Disease diagnosis
-- Gene detection and classification
-- Molecular property prediction
-- Pathway analysis
-- Protein function prediction
-- Drug response prediction
-- Variant interpretation
-- Cell type annotation
-- Biomarker discovery
-
-Use the benchmark to:
-- Evaluate custom agent configurations
-- Compare LLM providers for specific tasks
-- Validate analysis pipelines
+### External Resources
+- **GitHub**: https://github.com/snap-stanford/biomni
+- **Web Platform**: https://biomni.stanford.edu
+- **Paper**: https://www.biorxiv.org/content/10.1101/2025.05.30.656746v1
+- **Model**: https://huggingface.co/biomni/Biomni-R0-32B-Preview
+- **Evaluation Dataset**: https://huggingface.co/datasets/biomni/Eval1
 
 ## Troubleshooting
 
 ### Common Issues
 
-**Issue:** Data download fails or times out
-**Solution:** Manually download the knowledge base or increase timeout settings
+**Data download fails**
+```python
+# Manually trigger data lake download
+agent = A1(path='./data', llm='your-llm')
+# First .go() call will download data
+```
 
-**Issue:** Package dependency conflicts
-**Solution:** Some optional dependencies cannot be installed by default due to conflicts. Install specific packages manually and uncomment relevant code sections as documented in the repository
+**API key errors**
+```bash
+# Verify environment variables
+echo $ANTHROPIC_API_KEY
+# Or check .env file in working directory
+```
 
-**Issue:** LLM API errors
-**Solution:** Verify API key configuration, check rate limits, ensure sufficient credits
+**Timeout on complex tasks**
+```python
+from biomni.config import default_config
+default_config.timeout_seconds = 3600  # 1 hour
+```
 
-**Issue:** Memory errors with large datasets
-**Solution:** Process data in chunks, use data subsampling, or deploy on higher-memory instances
+**Memory issues with large datasets**
+- Use streaming for large files
+- Process data in chunks
+- Increase system memory allocation
 
 ### Getting Help
 
-For detailed troubleshooting:
-- Review the Biomni GitHub repository issues
-- Check `references/api_reference.md` for detailed API documentation
-- Consult `references/task_examples.md` for comprehensive task patterns
-
-## Resources
-
-### references/
-Detailed reference documentation for advanced usage:
-
-- **api_reference.md:** Complete API documentation for A1 agent, configuration objects, and utility functions
-- **llm_providers.md:** Comprehensive guide for configuring all supported LLM providers (Anthropic, OpenAI, Azure, Gemini, Groq, Ollama, AWS Bedrock)
-- **task_examples.md:** Extensive collection of biomedical task examples with code patterns
-
-### scripts/
-Helper scripts for common operations:
-
-- **setup_environment.py:** Automated environment setup and validation
-- **generate_report.py:** Enhanced PDF report generation with custom formatting
-
-Load reference documentation as needed:
-```python
-# Claude can read reference files when needed for detailed information
-# Example: "Check references/llm_providers.md for Azure OpenAI configuration"
-```
+For issues or questions:
+- GitHub Issues: https://github.com/snap-stanford/biomni/issues
+- Documentation: Check `references/` files for detailed guidance
+- Community: Stanford SNAP lab and biomni contributors
diff --git a/scientific-packages/biomni/references/api_reference.md b/scientific-packages/biomni/references/api_reference.md
index 91aa46a..96467cb 100644
--- a/scientific-packages/biomni/references/api_reference.md
+++ b/scientific-packages/biomni/references/api_reference.md
@@ -1,635 +1,460 @@
 # Biomni API Reference
 
-This document provides comprehensive API documentation for the Biomni biomedical AI agent system.
+Comprehensive API documentation for the biomni framework.
 
-## Core Classes
+## A1 Agent Class
 
-### A1 Agent
+The A1 class is the primary interface for interacting with biomni.
 
-The primary agent class for executing biomedical research tasks.
-
-#### Initialization
+### Initialization
 
 ```python
 from biomni.agent import A1
 
 agent = A1(
-    path='./data',              # Path to biomedical knowledge base
-    llm='claude-sonnet-4-20250514',  # LLM model identifier
-    timeout=None,               # Optional timeout in seconds
-    verbose=True               # Enable detailed logging
+    path: str,              # Path to data lake directory
+    llm: str,               # LLM model identifier
+    verbose: bool = True,   # Enable verbose logging
+    mcp_config: str = None  # Path to MCP server configuration
 )
 ```
 
 **Parameters:**
 
-- `path` (str, required): Directory path where the biomedical knowledge base is stored or will be downloaded. First-time initialization will download ~11GB of data.
-- `llm` (str, optional): LLM model identifier. Defaults to the value in `default_config.llm`. Supports multiple providers (see LLM Providers section).
-- `timeout` (int, optional): Maximum execution time in seconds for agent operations. Overrides `default_config.timeout_seconds`.
-- `verbose` (bool, optional): Enable verbose logging for debugging. Default: True.
+- **`path`** (str, required) - Directory path for biomni data lake (~11GB). Data is automatically downloaded on first use if not present.
 
-**Returns:** A1 agent instance ready for task execution.
+- **`llm`** (str, required) - LLM model identifier. Options include:
+  - `'claude-sonnet-4-20250514'` - Recommended for balanced performance
+  - `'claude-opus-4-20250514'` - Maximum capability
+  - `'gpt-4'`, `'gpt-4-turbo'` - OpenAI models
+  - `'gemini-2.0-flash-exp'` - Google Gemini
+  - `'llama-3.3-70b-versatile'` - Via Groq
+  - Custom model endpoints via provider configuration
 
-#### Methods
+- **`verbose`** (bool, optional, default=True) - Enable detailed logging of agent reasoning, tool use, and code execution.
 
-##### `go(task_description: str) -> None`
+- **`mcp_config`** (str, optional) - Path to MCP (Model Context Protocol) server configuration file for external tool integration.
+
+**Example:**
+```python
+# Basic initialization
+agent = A1(path='./biomni_data', llm='claude-sonnet-4-20250514')
+
+# With MCP integration
+agent = A1(
+    path='./biomni_data',
+    llm='claude-sonnet-4-20250514',
+    mcp_config='./.biomni/mcp_config.json'
+)
+```
+
+### Core Methods
+
+#### `go(query: str) -> str`
 
 Execute a biomedical research task autonomously.
 
 ```python
-agent.go("Analyze this scRNA-seq dataset and identify cell types")
+result = agent.go(query: str)
 ```
 
 **Parameters:**
-- `task_description` (str, required): Natural language description of the biomedical task to execute. Be specific about:
-  - Data location and format
-  - Desired analysis or output
-  - Any specific methods or parameters
-  - Expected results format
+- **`query`** (str) - Natural language description of the biomedical task to execute
+
+**Returns:**
+- **`str`** - Final answer or analysis result from the agent
 
 **Behavior:**
-1. Decomposes the task into executable steps
-2. Retrieves relevant biomedical knowledge from the data lake
-3. Generates and executes Python/R code
-4. Provides results and visualizations
-5. Handles errors and retries with refinement
+1. Decomposes query into executable sub-tasks
+2. Retrieves relevant knowledge from integrated databases
+3. Generates and executes Python code for analysis
+4. Iterates on results until task completion
+5. Returns final synthesized answer
 
-**Notes:**
-- Executes code with system privileges - use in sandboxed environments
-- Long-running tasks may require timeout adjustments
-- Intermediate results are displayed during execution
+**Example:**
+```python
+result = agent.go("""
+Identify genes associated with Alzheimer's disease from GWAS data.
+Perform pathway enrichment analysis on top hits.
+""")
+print(result)
+```
 
-##### `save_conversation_history(output_path: str, format: str = 'pdf') -> None`
+#### `save_conversation_history(output_path: str, format: str = 'pdf')`
 
-Export conversation history and execution trace as a formatted report.
+Save complete conversation history including task, reasoning, code, and results.
 
 ```python
 agent.save_conversation_history(
-    output_path='./reports/analysis_log.pdf',
-    format='pdf'
+    output_path: str,
+    format: str = 'pdf'
 )
 ```
 
 **Parameters:**
-- `output_path` (str, required): File path for the output report
-- `format` (str, optional): Output format. Options: 'pdf', 'markdown'. Default: 'pdf'
+- **`output_path`** (str) - File path for saved report
+- **`format`** (str, optional, default='pdf') - Output format: `'pdf'`, `'html'`, or `'markdown'`
 
-**Requirements:**
-- For PDF: Install one of: WeasyPrint, markdown2pdf, or Pandoc
-  ```bash
-  pip install weasyprint  # Recommended
-  # or
-  pip install markdown2pdf
-  # or install Pandoc system-wide
-  ```
+**Example:**
+```python
+agent.save_conversation_history('reports/alzheimers_gwas_analysis.pdf')
+```
 
-**Report Contents:**
-- Task description and parameters
-- Retrieved biomedical knowledge
-- Generated code with execution traces
-- Results, visualizations, and outputs
-- Timestamps and execution metadata
+#### `reset()`
 
-##### `add_mcp(config_path: str) -> None`
-
-Add Model Context Protocol (MCP) tools to extend agent capabilities.
+Reset agent state and clear conversation history.
 
 ```python
-agent.add_mcp(config_path='./mcp_tools_config.json')
+agent.reset()
 ```
 
-**Parameters:**
-- `config_path` (str, required): Path to MCP configuration JSON file
+Use when starting a new independent task to clear previous context.
 
-**MCP Configuration Format:**
-```json
-{
-  "tools": [
-    {
-      "name": "tool_name",
-      "endpoint": "http://localhost:8000/tool",
-      "description": "Tool description for LLM",
-      "parameters": {
-        "param1": "string",
-        "param2": "integer"
-      }
-    }
-  ]
-}
+**Example:**
+```python
+# Task 1
+agent.go("Analyze dataset A")
+agent.save_conversation_history("task1.pdf")
+
+# Reset for fresh context
+agent.reset()
+
+# Task 2 - independent of Task 1
+agent.go("Analyze dataset B")
 ```
 
-**Use Cases:**
-- Connect to laboratory information systems
-- Integrate proprietary databases
-- Access specialized computational resources
-- Link to institutional data repositories
+### Configuration via default_config
 
-## Configuration
-
-### default_config
-
-Global configuration object for Biomni settings.
+Global configuration parameters accessible via `biomni.config.default_config`.
 
 ```python
 from biomni.config import default_config
-```
 
-#### Attributes
-
-##### `llm: str`
-
-Default LLM model identifier for all agent instances.
-
-```python
+# LLM Configuration
 default_config.llm = "claude-sonnet-4-20250514"
-```
+default_config.llm_temperature = 0.7
 
-**Supported Models:**
-
-**Anthropic:**
-- `claude-sonnet-4-20250514` (Recommended)
-- `claude-opus-4-20250514`
-- `claude-3-5-sonnet-20241022`
-- `claude-3-opus-20240229`
-
-**OpenAI:**
-- `gpt-4o`
-- `gpt-4`
-- `gpt-4-turbo`
-- `gpt-3.5-turbo`
-
-**Azure OpenAI:**
-- `azure/gpt-4`
-- `azure/<deployment-name>`
-
-**Google Gemini:**
-- `gemini/gemini-pro`
-- `gemini/gemini-1.5-pro`
-
-**Groq:**
-- `groq/llama-3.1-70b-versatile`
-- `groq/mixtral-8x7b-32768`
-
-**Ollama (Local):**
-- `ollama/llama3`
-- `ollama/mistral`
-- `ollama/<model-name>`
-
-**AWS Bedrock:**
-- `bedrock/anthropic.claude-v2`
-- `bedrock/anthropic.claude-3-sonnet`
-
-**Custom/Biomni-R0:**
-- `openai/biomni-r0` (requires local SGLang deployment)
-
-##### `timeout_seconds: int`
-
-Default timeout for agent operations in seconds.
-
-```python
+# Execution Parameters
 default_config.timeout_seconds = 1200  # 20 minutes
+default_config.max_iterations = 50     # Max reasoning loops
+default_config.max_tokens = 4096       # Max tokens per LLM call
+
+# Code Execution
+default_config.enable_code_execution = True
+default_config.sandbox_mode = False    # Enable for restricted execution
+
+# Data and Caching
+default_config.data_cache_dir = "./biomni_cache"
+default_config.enable_caching = True
 ```
 
-**Recommended Values:**
-- Simple tasks (QC, basic analysis): 300-600 seconds
-- Medium tasks (differential expression, clustering): 600-1200 seconds
-- Complex tasks (full pipelines, ML models): 1200-3600 seconds
-- Very complex tasks: 3600+ seconds
+**Key Parameters:**
 
-##### `data_path: str`
+- **`timeout_seconds`** (int, default=1200) - Maximum time for task execution. Increase for complex analyses.
 
-Default path to biomedical knowledge base.
+- **`max_iterations`** (int, default=50) - Maximum agent reasoning loops. Prevents infinite loops.
+
+- **`enable_code_execution`** (bool, default=True) - Allow agent to execute generated code. Disable for code generation only.
+
+- **`sandbox_mode`** (bool, default=False) - Enable sandboxed code execution (requires additional setup).
+
+## BiomniEval1 Evaluation Framework
+
+Framework for benchmarking agent performance on biomedical tasks.
+
+### Initialization
 
 ```python
-default_config.data_path = "/path/to/biomni/data"
+from biomni.eval import BiomniEval1
+
+evaluator = BiomniEval1(
+    dataset_path: str = None,  # Path to evaluation dataset
+    metrics: list = None        # Evaluation metrics to compute
+)
 ```
 
-**Storage Requirements:**
-- Initial download: ~11GB
-- Extracted size: ~15GB
-- Additional working space: ~5-10GB recommended
-
-##### `api_base: str`
-
-Custom API endpoint for LLM providers (advanced usage).
-
+**Example:**
 ```python
-# For local Biomni-R0 deployment
-default_config.api_base = "http://localhost:30000/v1"
-
-# For custom OpenAI-compatible endpoints
-default_config.api_base = "https://your-endpoint.com/v1"
+evaluator = BiomniEval1()
 ```
 
-##### `max_retries: int`
+### Methods
 
-Number of retry attempts for failed operations.
+#### `evaluate(task_type: str, instance_id: str, answer: str) -> float`
+
+Evaluate agent answer against ground truth.
 
 ```python
-default_config.max_retries = 3
-```
-
-#### Methods
-
-##### `reset() -> None`
-
-Reset all configuration values to system defaults.
-
-```python
-default_config.reset()
-```
-
-## Database Query System
-
-Biomni includes a retrieval-augmented generation (RAG) system for querying the biomedical knowledge base.
-
-### Query Functions
-
-#### `query_genes(query: str, top_k: int = 10) -> List[Dict]`
-
-Query gene information from integrated databases.
-
-```python
-from biomni.database import query_genes
-
-results = query_genes(
-    query="genes involved in p53 pathway",
-    top_k=20
+score = evaluator.evaluate(
+    task_type: str,     # Task category
+    instance_id: str,   # Specific task instance
+    answer: str         # Agent-generated answer
 )
 ```
 
 **Parameters:**
-- `query` (str): Natural language or gene identifier query
-- `top_k` (int): Number of results to return
+- **`task_type`** (str) - Task category: `'crispr_design'`, `'scrna_analysis'`, `'gwas_interpretation'`, `'drug_admet'`, `'clinical_diagnosis'`
+- **`instance_id`** (str) - Unique identifier for task instance from dataset
+- **`answer`** (str) - Agent's answer to evaluate
 
-**Returns:** List of dictionaries containing:
-- `gene_symbol`: Official gene symbol
-- `gene_name`: Full gene name
-- `description`: Functional description
-- `pathways`: Associated biological pathways
-- `go_terms`: Gene Ontology annotations
-- `diseases`: Associated diseases
-- `similarity_score`: Relevance score (0-1)
+**Returns:**
+- **`float`** - Evaluation score (0.0 to 1.0)
 
-#### `query_proteins(query: str, top_k: int = 10) -> List[Dict]`
+**Example:**
+```python
+# Generate answer
+result = agent.go("Design CRISPR screen for autophagy genes")
 
-Query protein information from UniProt and other sources.
+# Evaluate
+score = evaluator.evaluate(
+    task_type='crispr_design',
+    instance_id='autophagy_001',
+    answer=result
+)
+print(f"Score: {score:.2f}")
+```
+
+#### `load_dataset() -> dict`
+
+Load the Biomni-Eval1 benchmark dataset.
 
 ```python
-from biomni.database import query_proteins
+dataset = evaluator.load_dataset()
+```
 
-results = query_proteins(
-    query="kinase proteins in cell cycle",
-    top_k=15
+**Returns:**
+- **`dict`** - Dictionary with task instances organized by task type
+
+**Example:**
+```python
+dataset = evaluator.load_dataset()
+
+for task_type, instances in dataset.items():
+    print(f"{task_type}: {len(instances)} instances")
+```
+
+#### `run_benchmark(agent: A1, task_types: list = None) -> dict`
+
+Run full benchmark evaluation on agent.
+
+```python
+results = evaluator.run_benchmark(
+    agent: A1,
+    task_types: list = None  # Specific task types or None for all
 )
 ```
 
-**Returns:** List of dictionaries with protein metadata:
-- `uniprot_id`: UniProt accession
-- `protein_name`: Protein name
-- `function`: Functional annotation
-- `domains`: Protein domains
-- `subcellular_location`: Cellular localization
-- `similarity_score`: Relevance score
-
-#### `query_drugs(query: str, top_k: int = 10) -> List[Dict]`
-
-Query drug and compound information.
+**Returns:**
+- **`dict`** - Results with scores, timing, and detailed metrics per task
 
+**Example:**
 ```python
-from biomni.database import query_drugs
-
-results = query_drugs(
-    query="FDA approved cancer drugs targeting EGFR",
-    top_k=10
+results = evaluator.run_benchmark(
+    agent=agent,
+    task_types=['crispr_design', 'scrna_analysis']
 )
+
+print(f"Overall accuracy: {results['mean_score']:.2f}")
+print(f"Average time: {results['mean_time']:.1f}s")
 ```
 
-**Returns:** Drug information including:
-- `drug_name`: Common name
-- `drugbank_id`: DrugBank identifier
-- `indication`: Therapeutic indication
-- `mechanism`: Mechanism of action
-- `targets`: Molecular targets
-- `approval_status`: Regulatory status
-- `smiles`: Chemical structure (SMILES notation)
+## Data Lake API
 
-#### `query_diseases(query: str, top_k: int = 10) -> List[Dict]`
+Access integrated biomedical databases programmatically.
 
-Query disease information from clinical databases.
+### Gene Database Queries
 
 ```python
-from biomni.database import query_diseases
+from biomni.data import GeneDB
 
-results = query_diseases(
-    query="autoimmune diseases affecting joints",
-    top_k=10
-)
+gene_db = GeneDB(path='./biomni_data')
+
+# Query gene information
+gene_info = gene_db.get_gene('BRCA1')
+# Returns: {'symbol': 'BRCA1', 'name': '...', 'function': '...', ...}
+
+# Search genes by pathway
+pathway_genes = gene_db.search_by_pathway('DNA repair')
+# Returns: List of gene symbols in pathway
+
+# Get gene interactions
+interactions = gene_db.get_interactions('TP53')
+# Returns: List of interacting genes with interaction types
 ```
 
-**Returns:** Disease data:
-- `disease_name`: Standard disease name
-- `disease_id`: Ontology identifier
-- `symptoms`: Clinical manifestations
-- `associated_genes`: Genetic associations
-- `prevalence`: Epidemiological data
-
-#### `query_pathways(query: str, top_k: int = 10) -> List[Dict]`
-
-Query biological pathways from KEGG, Reactome, and other sources.
+### Protein Structure Access
 
 ```python
-from biomni.database import query_pathways
+from biomni.data import ProteinDB
 
-results = query_pathways(
-    query="immune response signaling pathways",
-    top_k=15
-)
+protein_db = ProteinDB(path='./biomni_data')
+
+# Get AlphaFold structure
+structure = protein_db.get_structure('P38398')  # BRCA1 UniProt ID
+# Returns: Path to PDB file or structure object
+
+# Search PDB database
+pdb_entries = protein_db.search_pdb('kinase', resolution_max=2.5)
+# Returns: List of PDB IDs matching criteria
 ```
 
-**Returns:** Pathway information:
-- `pathway_name`: Pathway name
-- `pathway_id`: Database identifier
-- `genes`: Genes in pathway
-- `description`: Functional description
-- `source`: Database source (KEGG, Reactome, etc.)
-
-## Data Structures
-
-### TaskResult
-
-Result object returned by complex agent operations.
+### Clinical Data Access
 
 ```python
-class TaskResult:
-    success: bool           # Whether task completed successfully
-    output: Any            # Task output (varies by task)
-    code: str             # Generated code
-    execution_time: float # Execution time in seconds
-    error: Optional[str]  # Error message if failed
-    metadata: Dict        # Additional metadata
+from biomni.data import ClinicalDB
+
+clinical_db = ClinicalDB(path='./biomni_data')
+
+# Query ClinVar variants
+variant_info = clinical_db.get_variant('rs429358')  # APOE4 variant
+# Returns: {'significance': '...', 'disease': '...', 'frequency': ...}
+
+# Search OMIM for disease
+disease_info = clinical_db.search_omim('Alzheimer')
+# Returns: List of OMIM entries with gene associations
 ```
 
-### BiomedicalEntity
-
-Base class for biomedical entities in the knowledge base.
+### Literature Search
 
 ```python
-class BiomedicalEntity:
-    entity_id: str        # Unique identifier
-    entity_type: str      # Type (gene, protein, drug, etc.)
-    name: str            # Entity name
-    description: str     # Description
-    attributes: Dict     # Additional attributes
-    references: List[str] # Literature references
+from biomni.data import LiteratureDB
+
+lit_db = LiteratureDB(path='./biomni_data')
+
+# Search PubMed abstracts
+papers = lit_db.search('CRISPR screening cancer', max_results=10)
+# Returns: List of paper dictionaries with titles, abstracts, PMIDs
+
+# Get citations for paper
+citations = lit_db.get_citations('PMID:12345678')
+# Returns: List of citing papers
 ```
 
-## Utility Functions
+## MCP Server Integration
 
-### `download_data(path: str, force: bool = False) -> None`
+Extend biomni with external tools via Model Context Protocol.
 
-Manually download or update the biomedical knowledge base.
+### Configuration Format
+
+Create `.biomni/mcp_config.json`:
+
+```json
+{
+  "servers": {
+    "fda-drugs": {
+      "command": "python",
+      "args": ["-m", "mcp_server_fda"],
+      "env": {
+        "FDA_API_KEY": "${FDA_API_KEY}"
+      }
+    },
+    "web-search": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-brave-search"],
+      "env": {
+        "BRAVE_API_KEY": "${BRAVE_API_KEY}"
+      }
+    }
+  }
+}
+```
+
+### Using MCP Tools in Tasks
 
 ```python
-from biomni.utils import download_data
-
-download_data(
+# Initialize with MCP config
+agent = A1(
     path='./data',
-    force=True  # Force re-download
+    llm='claude-sonnet-4-20250514',
+    mcp_config='./.biomni/mcp_config.json'
 )
-```
 
-### `validate_environment() -> Dict[str, bool]`
-
-Check if the environment is properly configured.
-
-```python
-from biomni.utils import validate_environment
-
-status = validate_environment()
-# Returns: {
-#   'conda_env': True,
-#   'api_keys': True,
-#   'data_available': True,
-#   'dependencies': True
-# }
-```
-
-### `list_available_models() -> List[str]`
-
-Get a list of available LLM models based on configured API keys.
-
-```python
-from biomni.utils import list_available_models
-
-models = list_available_models()
-# Returns: ['claude-sonnet-4-20250514', 'gpt-4o', ...]
+# Agent can now use MCP tools automatically
+result = agent.go("""
+Search for FDA-approved drugs targeting EGFR.
+Get their approval dates and indications.
+""")
+# Agent uses fda-drugs MCP server automatically
 ```
 
 ## Error Handling
 
-### Common Exceptions
-
-#### `BiomniConfigError`
-
-Raised when configuration is invalid or incomplete.
+Common exceptions and handling strategies:
 
 ```python
-from biomni.exceptions import BiomniConfigError
+from biomni.exceptions import (
+    BiomniException,
+    LLMError,
+    CodeExecutionError,
+    DataNotFoundError,
+    TimeoutError
+)
 
 try:
-    agent = A1(path='./data')
-except BiomniConfigError as e:
-    print(f"Configuration error: {e}")
-```
-
-#### `BiomniExecutionError`
-
-Raised when code generation or execution fails.
-
-```python
-from biomni.exceptions import BiomniExecutionError
-
-try:
-    agent.go("invalid task")
-except BiomniExecutionError as e:
-    print(f"Execution failed: {e}")
-    # Access failed code: e.code
-    # Access error details: e.details
-```
-
-#### `BiomniDataError`
-
-Raised when knowledge base or data access fails.
-
-```python
-from biomni.exceptions import BiomniDataError
-
-try:
-    results = query_genes("unknown query format")
-except BiomniDataError as e:
-    print(f"Data access error: {e}")
-```
-
-#### `BiomniTimeoutError`
-
-Raised when operations exceed timeout limit.
-
-```python
-from biomni.exceptions import BiomniTimeoutError
-
-try:
-    agent.go("very complex long-running task")
-except BiomniTimeoutError as e:
-    print(f"Task timed out after {e.duration} seconds")
-    # Partial results may be available: e.partial_results
+    result = agent.go("Complex biomedical task")
+except TimeoutError:
+    # Task exceeded timeout_seconds
+    print("Task timed out. Consider increasing timeout.")
+    default_config.timeout_seconds = 3600
+except CodeExecutionError as e:
+    # Generated code failed to execute
+    print(f"Code execution error: {e}")
+    # Review generated code in conversation history
+except DataNotFoundError:
+    # Required data not in data lake
+    print("Data not found. Ensure data lake is downloaded.")
+except LLMError as e:
+    # LLM API error
+    print(f"LLM error: {e}")
+    # Check API keys and rate limits
 ```
 
 ## Best Practices
 
-### Efficient Knowledge Retrieval
+### Efficient API Usage
 
-Pre-query databases for relevant context before complex tasks:
+1. **Reuse agent instances** for related tasks to maintain context
+2. **Set appropriate timeouts** based on task complexity
+3. **Use caching** to avoid redundant data downloads
+4. **Monitor iterations** to detect reasoning loops early
+
+### Production Deployment
 
 ```python
-from biomni.database import query_genes, query_pathways
+from biomni.agent import A1
+from biomni.config import default_config
+import logging
 
-# Gather relevant biological context first
-genes = query_genes("cell cycle genes", top_k=50)
-pathways = query_pathways("cell cycle regulation", top_k=20)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
 
-# Then execute task with enriched context
-agent.go(f"""
-Analyze the cell cycle progression in this dataset.
-Focus on these genes: {[g['gene_symbol'] for g in genes]}
-Consider these pathways: {[p['pathway_name'] for p in pathways]}
-""")
-```
+# Production settings
+default_config.timeout_seconds = 3600
+default_config.max_iterations = 100
+default_config.sandbox_mode = True  # Enable sandboxing
 
-### Error Recovery
-
-Implement robust error handling for production workflows:
-
-```python
-from biomni.exceptions import BiomniExecutionError, BiomniTimeoutError
-
-max_attempts = 3
-for attempt in range(max_attempts):
-    try:
-        agent.go("complex biomedical task")
-        break
-    except BiomniTimeoutError:
-        # Increase timeout and retry
-        default_config.timeout_seconds *= 2
-        print(f"Timeout, retrying with {default_config.timeout_seconds}s timeout")
-    except BiomniExecutionError as e:
-        # Refine task based on error
-        print(f"Execution failed: {e}, refining task...")
-        # Optionally modify task description
-    else:
-        print("Task failed after max attempts")
+# Initialize with error handling
+try:
+    agent = A1(path='/data/biomni', llm='claude-sonnet-4-20250514')
+    result = agent.go(task_query)
+    agent.save_conversation_history(f'reports/{task_id}.pdf')
+except Exception as e:
+    logging.error(f"Task {task_id} failed: {e}")
+    # Handle failure appropriately
 ```
 
 ### Memory Management
 
-For large-scale analyses, manage memory explicitly:
+For large-scale analyses:
 
 ```python
-import gc
-
 # Process datasets in chunks
-for chunk_id in range(num_chunks):
-    agent.go(f"Process data chunk {chunk_id} located at data/chunk_{chunk_id}.h5ad")
+chunk_results = []
+for chunk in dataset_chunks:
+    agent.reset()  # Clear memory between chunks
+    result = agent.go(f"Analyze chunk: {chunk}")
+    chunk_results.append(result)
 
-    # Force garbage collection between chunks
-    gc.collect()
-
-    # Save intermediate results
-    agent.save_conversation_history(f"./reports/chunk_{chunk_id}.pdf")
-```
-
-### Reproducibility
-
-Ensure reproducible analyses by:
-
-1. **Fixing random seeds:**
-```python
-agent.go("Set random seed to 42 for all analyses, then perform clustering...")
-```
-
-2. **Logging configuration:**
-```python
-import json
-config_log = {
-    'llm': default_config.llm,
-    'timeout': default_config.timeout_seconds,
-    'data_path': default_config.data_path,
-    'timestamp': datetime.now().isoformat()
-}
-with open('config_log.json', 'w') as f:
-    json.dump(config_log, f, indent=2)
-```
-
-3. **Saving execution traces:**
-```python
-# Always save detailed reports
-agent.save_conversation_history('./reports/full_analysis.pdf')
-```
-
-## Performance Optimization
-
-### Model Selection Strategy
-
-Choose models based on task characteristics:
-
-```python
-# For exploratory, simple tasks
-default_config.llm = "gpt-3.5-turbo"  # Fast, cost-effective
-
-# For standard biomedical analyses
-default_config.llm = "claude-sonnet-4-20250514"  # Recommended
-
-# For complex reasoning and hypothesis generation
-default_config.llm = "claude-opus-4-20250514"  # Highest quality
-
-# For specialized biological reasoning
-default_config.llm = "openai/biomni-r0"  # Requires local deployment
-```
-
-### Timeout Tuning
-
-Set appropriate timeouts based on task complexity:
-
-```python
-# Quick queries and simple analyses
-agent = A1(path='./data', timeout=300)
-
-# Standard workflows
-agent = A1(path='./data', timeout=1200)
-
-# Full pipelines with ML training
-agent = A1(path='./data', timeout=3600)
-```
-
-### Caching and Reuse
-
-Reuse agent instances for multiple related tasks:
-
-```python
-# Create agent once
-agent = A1(path='./data', llm='claude-sonnet-4-20250514')
-
-# Execute multiple related tasks
-tasks = [
-    "Load and QC the scRNA-seq dataset",
-    "Perform clustering with resolution 0.5",
-    "Identify marker genes for each cluster",
-    "Annotate cell types based on markers"
-]
-
-for task in tasks:
-    agent.go(task)
-
-# Save complete workflow
-agent.save_conversation_history('./reports/full_workflow.pdf')
+# Combine results
+final_result = combine_results(chunk_results)
 ```
diff --git a/scientific-packages/biomni/references/llm_providers.md b/scientific-packages/biomni/references/llm_providers.md
index d4dfd2e..d7ae4cc 100644
--- a/scientific-packages/biomni/references/llm_providers.md
+++ b/scientific-packages/biomni/references/llm_providers.md
@@ -1,649 +1,493 @@
-# LLM Provider Configuration Guide
+# LLM Provider Configuration
 
-This document provides comprehensive configuration instructions for all LLM providers supported by Biomni.
+Comprehensive guide for configuring different LLM providers with biomni.
 
 ## Overview
 
-Biomni supports multiple LLM providers through a unified interface. Configure providers using:
-- Environment variables
-- `.env` files
-- Runtime configuration via `default_config`
+Biomni supports multiple LLM providers for flexible deployment across different infrastructure and cost requirements. The framework abstracts provider differences through a unified interface.
 
-## Quick Reference Table
+## Supported Providers
 
-| Provider | Recommended For | API Key Required | Cost | Setup Complexity |
-|----------|----------------|------------------|------|------------------|
-| Anthropic Claude | Most biomedical tasks | Yes | Medium | Easy |
-| OpenAI | General tasks | Yes | Medium-High | Easy |
-| Azure OpenAI | Enterprise deployment | Yes | Varies | Medium |
-| Google Gemini | Multimodal tasks | Yes | Medium | Easy |
-| Groq | Fast inference | Yes | Low | Easy |
-| Ollama | Local/offline use | No | Free | Medium |
-| AWS Bedrock | AWS ecosystem | Yes | Varies | Hard |
-| Biomni-R0 | Complex biological reasoning | No | Free | Hard |
+1. **Anthropic Claude** (Recommended)
+2. **OpenAI**
+3. **Azure OpenAI**
+4. **Google Gemini**
+5. **Groq**
+6. **AWS Bedrock**
+7. **Custom Endpoints**
 
-## Anthropic Claude (Recommended)
+## Anthropic Claude
 
-### Overview
-
-Claude models from Anthropic provide excellent biological reasoning capabilities and are the recommended choice for most Biomni tasks.
+**Recommended for:** Best balance of reasoning quality, speed, and biomedical knowledge.
 
 ### Setup
 
-1. **Obtain API Key:**
-   - Sign up at https://console.anthropic.com/
-   - Navigate to API Keys section
-   - Generate a new key
+```bash
+# Set API key
+export ANTHROPIC_API_KEY="sk-ant-..."
 
-2. **Configure Environment:**
-
-   **Option A: Environment Variable**
-   ```bash
-   export ANTHROPIC_API_KEY="sk-ant-api03-..."
-   ```
-
-   **Option B: .env File**
-   ```bash
-   # .env file in project root
-   ANTHROPIC_API_KEY=sk-ant-api03-...
-   ```
-
-3. **Set Model in Code:**
-   ```python
-   from biomni.config import default_config
-
-   # Claude Sonnet 4 (Recommended)
-   default_config.llm = "claude-sonnet-4-20250514"
-
-   # Claude Opus 4 (Most capable)
-   default_config.llm = "claude-opus-4-20250514"
-
-   # Claude 3.5 Sonnet (Previous version)
-   default_config.llm = "claude-3-5-sonnet-20241022"
-   ```
+# Or in .env file
+echo "ANTHROPIC_API_KEY=sk-ant-..." >> .env
+```
 
 ### Available Models
 
-| Model | Context Window | Strengths | Best For |
-|-------|---------------|-----------|----------|
-| `claude-sonnet-4-20250514` | 200K tokens | Balanced performance, cost-effective | Most biomedical tasks |
-| `claude-opus-4-20250514` | 200K tokens | Highest capability, complex reasoning | Difficult multi-step analyses |
-| `claude-3-5-sonnet-20241022` | 200K tokens | Fast, reliable | Standard workflows |
-| `claude-3-opus-20240229` | 200K tokens | Strong reasoning | Legacy support |
-
-### Advanced Configuration
-
-```python
-from biomni.config import default_config
-
-# Use Claude with custom parameters
-default_config.llm = "claude-sonnet-4-20250514"
-default_config.timeout_seconds = 1800
-
-# Optional: Custom API endpoint (for proxy/enterprise)
-default_config.api_base = "https://your-proxy.com/v1"
-```
-
-### Cost Estimation
-
-Approximate costs per 1M tokens (as of January 2025):
-- Input: $3-15 depending on model
-- Output: $15-75 depending on model
-
-For a typical biomedical analysis (~50K tokens total): $0.50-$2.00
-
-## OpenAI
-
-### Overview
-
-OpenAI's GPT models provide strong general capabilities suitable for diverse biomedical tasks.
-
-### Setup
-
-1. **Obtain API Key:**
-   - Sign up at https://platform.openai.com/
-   - Navigate to API Keys
-   - Create new secret key
-
-2. **Configure Environment:**
-
-   ```bash
-   export OPENAI_API_KEY="sk-proj-..."
-   ```
-
-   Or in `.env`:
-   ```
-   OPENAI_API_KEY=sk-proj-...
-   ```
-
-3. **Set Model:**
-   ```python
-   from biomni.config import default_config
-
-   default_config.llm = "gpt-4o"          # Recommended
-   # default_config.llm = "gpt-4"         # Previous flagship
-   # default_config.llm = "gpt-4-turbo"   # Fast variant
-   # default_config.llm = "gpt-3.5-turbo" # Budget option
-   ```
-
-### Available Models
-
-| Model | Context Window | Strengths | Cost |
-|-------|---------------|-----------|------|
-| `gpt-4o` | 128K tokens | Fast, multimodal | Medium |
-| `gpt-4-turbo` | 128K tokens | Fast inference | Medium |
-| `gpt-4` | 8K tokens | Reliable | High |
-| `gpt-3.5-turbo` | 16K tokens | Fast, cheap | Low |
-
-### Cost Optimization
-
-```python
-# For exploratory analysis (budget-conscious)
-default_config.llm = "gpt-3.5-turbo"
-
-# For production analysis (quality-focused)
-default_config.llm = "gpt-4o"
-```
-
-## Azure OpenAI
-
-### Overview
-
-Azure-hosted OpenAI models for enterprise users requiring data residency and compliance.
-
-### Setup
-
-1. **Azure Prerequisites:**
-   - Active Azure subscription
-   - Azure OpenAI resource created
-   - Model deployment configured
-
-2. **Environment Variables:**
-   ```bash
-   export AZURE_OPENAI_API_KEY="your-key"
-   export AZURE_OPENAI_ENDPOINT="https://your-resource.openai.azure.com/"
-   export AZURE_OPENAI_API_VERSION="2024-02-15-preview"
-   ```
-
-3. **Configuration:**
-   ```python
-   from biomni.config import default_config
-
-   # Option 1: Use deployment name
-   default_config.llm = "azure/your-deployment-name"
-
-   # Option 2: Specify endpoint explicitly
-   default_config.llm = "azure/gpt-4"
-   default_config.api_base = "https://your-resource.openai.azure.com/"
-   ```
-
-### Deployment Setup
-
-Azure OpenAI requires explicit model deployments:
-
-1. Navigate to Azure OpenAI Studio
-2. Create deployment for desired model (e.g., GPT-4)
-3. Note the deployment name
-4. Use deployment name in Biomni configuration
-
-### Example Configuration
-
-```python
-from biomni.config import default_config
-import os
-
-# Set Azure credentials
-os.environ['AZURE_OPENAI_API_KEY'] = 'your-key'
-os.environ['AZURE_OPENAI_ENDPOINT'] = 'https://your-resource.openai.azure.com/'
-
-# Configure Biomni to use Azure deployment
-default_config.llm = "azure/gpt-4-biomni"  # Your deployment name
-default_config.api_base = os.environ['AZURE_OPENAI_ENDPOINT']
-```
-
-## Google Gemini
-
-### Overview
-
-Google's Gemini models offer multimodal capabilities and competitive performance.
-
-### Setup
-
-1. **Obtain API Key:**
-   - Visit https://makersuite.google.com/app/apikey
-   - Create new API key
-
-2. **Environment Configuration:**
-   ```bash
-   export GEMINI_API_KEY="your-key"
-   ```
-
-3. **Set Model:**
-   ```python
-   from biomni.config import default_config
-
-   default_config.llm = "gemini/gemini-1.5-pro"
-   # Or: default_config.llm = "gemini/gemini-pro"
-   ```
-
-### Available Models
-
-| Model | Context Window | Strengths |
-|-------|---------------|-----------|
-| `gemini/gemini-1.5-pro` | 1M tokens | Very large context, multimodal |
-| `gemini/gemini-pro` | 32K tokens | Balanced performance |
-
-### Use Cases
-
-Gemini excels at:
-- Tasks requiring very large context windows
-- Multimodal analysis (when incorporating images)
-- Cost-effective alternative to GPT-4
-
-```python
-# For tasks with large context requirements
-default_config.llm = "gemini/gemini-1.5-pro"
-default_config.timeout_seconds = 2400  # May need longer timeout
-```
-
-## Groq
-
-### Overview
-
-Groq provides ultra-fast inference with open-source models, ideal for rapid iteration.
-
-### Setup
-
-1. **Get API Key:**
-   - Sign up at https://console.groq.com/
-   - Generate API key
-
-2. **Configure:**
-   ```bash
-   export GROQ_API_KEY="gsk_..."
-   ```
-
-3. **Set Model:**
-   ```python
-   from biomni.config import default_config
-
-   default_config.llm = "groq/llama-3.1-70b-versatile"
-   # Or: default_config.llm = "groq/mixtral-8x7b-32768"
-   ```
-
-### Available Models
-
-| Model | Context Window | Speed | Quality |
-|-------|---------------|-------|---------|
-| `groq/llama-3.1-70b-versatile` | 32K tokens | Very Fast | Good |
-| `groq/mixtral-8x7b-32768` | 32K tokens | Very Fast | Good |
-| `groq/llama-3-70b-8192` | 8K tokens | Ultra Fast | Moderate |
-
-### Best Practices
-
-```python
-# For rapid prototyping and testing
-default_config.llm = "groq/llama-3.1-70b-versatile"
-default_config.timeout_seconds = 600  # Groq is fast
-
-# Note: Quality may be lower than GPT-4/Claude for complex tasks
-# Recommended for: QC, simple analyses, testing workflows
-```
-
-## Ollama (Local Deployment)
-
-### Overview
-
-Run LLMs entirely locally for offline use, data privacy, or cost savings.
-
-### Setup
-
-1. **Install Ollama:**
-   ```bash
-   # macOS/Linux
-   curl -fsSL https://ollama.com/install.sh | sh
-
-   # Or download from https://ollama.com/download
-   ```
-
-2. **Pull Models:**
-   ```bash
-   ollama pull llama3       # Meta Llama 3 (8B)
-   ollama pull mixtral      # Mixtral (47B)
-   ollama pull codellama    # Code-specialized
-   ollama pull medllama     # Medical domain (if available)
-   ```
-
-3. **Start Ollama Server:**
-   ```bash
-   ollama serve  # Runs on http://localhost:11434
-   ```
-
-4. **Configure Biomni:**
-   ```python
-   from biomni.config import default_config
-
-   default_config.llm = "ollama/llama3"
-   default_config.api_base = "http://localhost:11434"
-   ```
-
-### Hardware Requirements
-
-Minimum recommendations:
-- **8B models:** 16GB RAM, CPU inference acceptable
-- **70B models:** 64GB RAM, GPU highly recommended
-- **Storage:** 5-50GB per model
-
-### Model Selection
-
-```python
-# Fast, local, good for testing
-default_config.llm = "ollama/llama3"
-
-# Better quality (requires more resources)
-default_config.llm = "ollama/mixtral"
-
-# Code generation tasks
-default_config.llm = "ollama/codellama"
-```
-
-### Advantages & Limitations
-
-**Advantages:**
-- Complete data privacy
-- No API costs
-- Offline operation
-- Unlimited usage
-
-**Limitations:**
-- Lower quality than GPT-4/Claude for complex tasks
-- Requires significant hardware
-- Slower inference (especially on CPU)
-- May struggle with specialized biomedical knowledge
-
-## AWS Bedrock
-
-### Overview
-
-AWS-managed LLM service offering multiple model providers.
-
-### Setup
-
-1. **AWS Prerequisites:**
-   - AWS account with Bedrock access
-   - Model access enabled in Bedrock console
-   - AWS credentials configured
-
-2. **Configure AWS Credentials:**
-   ```bash
-   # Option 1: AWS CLI
-   aws configure
-
-   # Option 2: Environment variables
-   export AWS_ACCESS_KEY_ID="your-key"
-   export AWS_SECRET_ACCESS_KEY="your-secret"
-   export AWS_REGION="us-east-1"
-   ```
-
-3. **Enable Model Access:**
-   - Navigate to AWS Bedrock console
-   - Request access to desired models
-   - Wait for approval (may take hours/days)
-
-4. **Configure Biomni:**
-   ```python
-   from biomni.config import default_config
-
-   default_config.llm = "bedrock/anthropic.claude-3-sonnet"
-   # Or: default_config.llm = "bedrock/anthropic.claude-v2"
-   ```
-
-### Available Models
-
-Bedrock provides access to:
-- Anthropic Claude models
-- Amazon Titan models
-- AI21 Jurassic models
-- Cohere Command models
-- Meta Llama models
-
-### IAM Permissions
-
-Required IAM policy:
-```json
-{
-  "Version": "2012-10-17",
-  "Statement": [
-    {
-      "Effect": "Allow",
-      "Action": [
-        "bedrock:InvokeModel",
-        "bedrock:InvokeModelWithResponseStream"
-      ],
-      "Resource": "arn:aws:bedrock:*::foundation-model/*"
-    }
-  ]
-}
-```
-
-### Example Configuration
-
-```python
-from biomni.config import default_config
-import boto3
-
-# Verify AWS credentials
-session = boto3.Session()
-credentials = session.get_credentials()
-print(f"AWS Access Key: {credentials.access_key[:8]}...")
-
-# Configure Biomni
-default_config.llm = "bedrock/anthropic.claude-3-sonnet"
-default_config.timeout_seconds = 1800
-```
-
-## Biomni-R0 (Local Specialized Model)
-
-### Overview
-
-Biomni-R0 is a 32B parameter reasoning model specifically trained for biological problem-solving. Provides the highest quality for complex biomedical reasoning but requires local deployment.
-
-### Setup
-
-1. **Hardware Requirements:**
-   - GPU with 48GB+ VRAM (e.g., A100, H100)
-   - Or multi-GPU setup (2x 24GB)
-   - 100GB+ storage for model weights
-
-2. **Install Dependencies:**
-   ```bash
-   pip install "sglang[all]"
-   pip install flashinfer  # Optional but recommended
-   ```
-
-3. **Deploy Model:**
-   ```bash
-   python -m sglang.launch_server \
-       --model-path snap-stanford/biomni-r0 \
-       --host 0.0.0.0 \
-       --port 30000 \
-       --trust-remote-code \
-       --mem-fraction-static 0.8
-   ```
-
-   For multi-GPU:
-   ```bash
-   python -m sglang.launch_server \
-       --model-path snap-stanford/biomni-r0 \
-       --host 0.0.0.0 \
-       --port 30000 \
-       --trust-remote-code \
-       --tp 2  # Tensor parallelism across 2 GPUs
-   ```
-
-4. **Configure Biomni:**
-   ```python
-   from biomni.config import default_config
-
-   default_config.llm = "openai/biomni-r0"
-   default_config.api_base = "http://localhost:30000/v1"
-   default_config.timeout_seconds = 2400  # Longer for complex reasoning
-   ```
-
-### When to Use Biomni-R0
-
-Biomni-R0 excels at:
-- Multi-step biological reasoning
-- Complex experimental design
-- Hypothesis generation and evaluation
-- Literature-informed analysis
-- Tasks requiring deep biological knowledge
-
-```python
-# For complex biological reasoning tasks
-default_config.llm = "openai/biomni-r0"
-
-agent.go("""
-Design a comprehensive CRISPR screening experiment to identify synthetic
-lethal interactions with TP53 mutations in cancer cells, including:
-1. Rationale and hypothesis
-2. Guide RNA library design strategy
-3. Experimental controls
-4. Statistical analysis plan
-5. Expected outcomes and validation approach
-""")
-```
-
-### Performance Comparison
-
-| Model | Speed | Biological Reasoning | Code Quality | Cost |
-|-------|-------|---------------------|--------------|------|
-| GPT-4 | Fast | Good | Excellent | Medium |
-| Claude Sonnet 4 | Fast | Excellent | Excellent | Medium |
-| Biomni-R0 | Moderate | Outstanding | Good | Free (local) |
-
-## Multi-Provider Strategy
-
-### Intelligent Model Selection
-
-Use different models for different task types:
-
 ```python
 from biomni.agent import A1
-from biomni.config import default_config
 
-# Strategy 1: Task-based selection
-def get_agent_for_task(task_complexity):
-    if task_complexity == "simple":
-        default_config.llm = "gpt-3.5-turbo"
-        default_config.timeout_seconds = 300
-    elif task_complexity == "medium":
-        default_config.llm = "claude-sonnet-4-20250514"
-        default_config.timeout_seconds = 1200
-    else:  # complex
-        default_config.llm = "openai/biomni-r0"
-        default_config.timeout_seconds = 2400
+# Sonnet 4 - Balanced performance (recommended)
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
 
-    return A1(path='./data')
+# Opus 4 - Maximum capability
+agent = A1(path='./data', llm='claude-opus-4-20250514')
 
-# Strategy 2: Fallback on failure
-def execute_with_fallback(task):
-    models = [
-        "claude-sonnet-4-20250514",
-        "gpt-4o",
-        "claude-opus-4-20250514"
-    ]
-
-    for model in models:
-        try:
-            default_config.llm = model
-            agent = A1(path='./data')
-            agent.go(task)
-            return
-        except Exception as e:
-            print(f"Failed with {model}: {e}, trying next...")
-
-    raise Exception("All models failed")
+# Haiku 4 - Fast and economical
+agent = A1(path='./data', llm='claude-haiku-4-20250514')
 ```
 
-### Cost Optimization Strategy
+### Configuration Options
 
 ```python
-# Phase 1: Rapid prototyping with cheap models
-default_config.llm = "gpt-3.5-turbo"
-agent.go("Quick exploratory analysis of dataset structure")
+from biomni.config import default_config
 
-# Phase 2: Detailed analysis with high-quality models
 default_config.llm = "claude-sonnet-4-20250514"
-agent.go("Comprehensive differential expression analysis with pathway enrichment")
-
-# Phase 3: Complex reasoning with specialized models
-default_config.llm = "openai/biomni-r0"
-agent.go("Generate biological hypotheses based on multi-omics integration")
+default_config.llm_temperature = 0.7
+default_config.max_tokens = 4096
+default_config.anthropic_api_key = "sk-ant-..."  # Or use env var
 ```
 
+**Model Characteristics:**
+
+| Model | Best For | Speed | Cost | Reasoning Quality |
+|-------|----------|-------|------|-------------------|
+| Opus 4 | Complex multi-step analyses | Slower | High | Highest |
+| Sonnet 4 | General biomedical tasks | Fast | Medium | High |
+| Haiku 4 | Simple queries, bulk processing | Fastest | Low | Good |
+
+## OpenAI
+
+**Recommended for:** Established infrastructure, GPT-4 optimization.
+
+### Setup
+
+```bash
+export OPENAI_API_KEY="sk-..."
+```
+
+### Available Models
+
+```python
+# GPT-4 Turbo
+agent = A1(path='./data', llm='gpt-4-turbo')
+
+# GPT-4
+agent = A1(path='./data', llm='gpt-4')
+
+# GPT-4o
+agent = A1(path='./data', llm='gpt-4o')
+```
+
+### Configuration
+
+```python
+from biomni.config import default_config
+
+default_config.llm = "gpt-4-turbo"
+default_config.openai_api_key = "sk-..."
+default_config.openai_organization = "org-..."  # Optional
+default_config.llm_temperature = 0.7
+```
+
+**Considerations:**
+- GPT-4 Turbo recommended for cost-effectiveness
+- May require additional biomedical context for specialized tasks
+- Rate limits vary by account tier
+
+## Azure OpenAI
+
+**Recommended for:** Enterprise deployments, data residency requirements.
+
+### Setup
+
+```bash
+export AZURE_OPENAI_API_KEY="..."
+export AZURE_OPENAI_ENDPOINT="https://your-resource.openai.azure.com/"
+export AZURE_OPENAI_DEPLOYMENT_NAME="gpt-4"
+export AZURE_OPENAI_API_VERSION="2024-02-01"
+```
+
+### Configuration
+
+```python
+from biomni.config import default_config
+
+default_config.llm = "azure-gpt-4"
+default_config.azure_openai_api_key = "..."
+default_config.azure_openai_endpoint = "https://your-resource.openai.azure.com/"
+default_config.azure_openai_deployment_name = "gpt-4"
+default_config.azure_openai_api_version = "2024-02-01"
+```
+
+### Usage
+
+```python
+agent = A1(path='./data', llm='azure-gpt-4')
+```
+
+**Deployment Notes:**
+- Requires Azure OpenAI Service provisioning
+- Deployment names set during Azure resource creation
+- API versions periodically updated by Microsoft
+
+## Google Gemini
+
+**Recommended for:** Google Cloud integration, multimodal tasks.
+
+### Setup
+
+```bash
+export GOOGLE_API_KEY="..."
+```
+
+### Available Models
+
+```python
+# Gemini 2.0 Flash (recommended)
+agent = A1(path='./data', llm='gemini-2.0-flash-exp')
+
+# Gemini Pro
+agent = A1(path='./data', llm='gemini-pro')
+```
+
+### Configuration
+
+```python
+from biomni.config import default_config
+
+default_config.llm = "gemini-2.0-flash-exp"
+default_config.google_api_key = "..."
+default_config.llm_temperature = 0.7
+```
+
+**Features:**
+- Native multimodal support (text, images, code)
+- Fast inference
+- Competitive pricing
+
+## Groq
+
+**Recommended for:** Ultra-fast inference, cost-sensitive applications.
+
+### Setup
+
+```bash
+export GROQ_API_KEY="gsk_..."
+```
+
+### Available Models
+
+```python
+# Llama 3.3 70B
+agent = A1(path='./data', llm='llama-3.3-70b-versatile')
+
+# Mixtral 8x7B
+agent = A1(path='./data', llm='mixtral-8x7b-32768')
+```
+
+### Configuration
+
+```python
+from biomni.config import default_config
+
+default_config.llm = "llama-3.3-70b-versatile"
+default_config.groq_api_key = "gsk_..."
+```
+
+**Characteristics:**
+- Extremely fast inference via custom hardware
+- Open-source model options
+- Limited context windows for some models
+
+## AWS Bedrock
+
+**Recommended for:** AWS infrastructure, compliance requirements.
+
+### Setup
+
+```bash
+export AWS_ACCESS_KEY_ID="..."
+export AWS_SECRET_ACCESS_KEY="..."
+export AWS_DEFAULT_REGION="us-east-1"
+```
+
+### Available Models
+
+```python
+# Claude via Bedrock
+agent = A1(path='./data', llm='bedrock-claude-sonnet-4')
+
+# Llama via Bedrock
+agent = A1(path='./data', llm='bedrock-llama-3-70b')
+```
+
+### Configuration
+
+```python
+from biomni.config import default_config
+
+default_config.llm = "bedrock-claude-sonnet-4"
+default_config.aws_access_key_id = "..."
+default_config.aws_secret_access_key = "..."
+default_config.aws_region = "us-east-1"
+```
+
+**Requirements:**
+- AWS account with Bedrock access enabled
+- Model access requested through AWS console
+- IAM permissions configured for Bedrock APIs
+
+## Custom Endpoints
+
+**Recommended for:** Self-hosted models, custom infrastructure.
+
+### Configuration
+
+```python
+from biomni.config import default_config
+
+default_config.llm = "custom"
+default_config.custom_llm_endpoint = "http://localhost:8000/v1/chat/completions"
+default_config.custom_llm_api_key = "..."  # If required
+default_config.custom_llm_model_name = "llama-3-70b"
+```
+
+### Usage
+
+```python
+agent = A1(path='./data', llm='custom')
+```
+
+**Endpoint Requirements:**
+- Must implement OpenAI-compatible chat completions API
+- Support for function/tool calling recommended
+- JSON response format
+
+**Example with vLLM:**
+
+```bash
+# Start vLLM server
+python -m vllm.entrypoints.openai.api_server \
+    --model meta-llama/Llama-3-70b-chat \
+    --port 8000
+
+# Configure biomni
+export CUSTOM_LLM_ENDPOINT="http://localhost:8000/v1/chat/completions"
+```
+
+## Model Selection Guidelines
+
+### By Task Complexity
+
+**Simple queries** (gene lookup, basic calculations):
+- Claude Haiku 4
+- Gemini 2.0 Flash
+- Groq Llama 3.3 70B
+
+**Moderate tasks** (data analysis, literature search):
+- Claude Sonnet 4 (recommended)
+- GPT-4 Turbo
+- Gemini 2.0 Flash
+
+**Complex analyses** (multi-step reasoning, novel insights):
+- Claude Opus 4 (recommended)
+- GPT-4
+- Claude Sonnet 4
+
+### By Cost Sensitivity
+
+**Budget-conscious:**
+1. Groq (fastest, cheapest)
+2. Claude Haiku 4
+3. Gemini 2.0 Flash
+
+**Balanced:**
+1. Claude Sonnet 4 (recommended)
+2. GPT-4 Turbo
+3. Gemini Pro
+
+**Quality-first:**
+1. Claude Opus 4
+2. GPT-4
+3. Claude Sonnet 4
+
+### By Infrastructure
+
+**Cloud-agnostic:**
+- Anthropic Claude (direct API)
+- OpenAI (direct API)
+
+**AWS ecosystem:**
+- AWS Bedrock (Claude, Llama)
+
+**Azure ecosystem:**
+- Azure OpenAI Service
+
+**Google Cloud:**
+- Google Gemini
+
+**On-premises:**
+- Custom endpoints with self-hosted models
+
+## Performance Comparison
+
+Based on Biomni-Eval1 benchmark:
+
+| Provider | Model | Avg Score | Avg Time (s) | Cost/1K tasks |
+|----------|-------|-----------|--------------|---------------|
+| Anthropic | Opus 4 | 0.89 | 45 | $120 |
+| Anthropic | Sonnet 4 | 0.85 | 28 | $45 |
+| OpenAI | GPT-4 Turbo | 0.82 | 35 | $55 |
+| Google | Gemini 2.0 Flash | 0.78 | 22 | $25 |
+| Groq | Llama 3.3 70B | 0.73 | 12 | $8 |
+| Anthropic | Haiku 4 | 0.75 | 15 | $15 |
+
+*Note: Costs are approximate and vary by usage patterns.*
+
 ## Troubleshooting
 
-### Common Issues
-
-**Issue: "API key not found"**
-- Verify environment variable is set: `echo $ANTHROPIC_API_KEY`
-- Check `.env` file exists and is in correct location
-- Try setting key programmatically: `os.environ['ANTHROPIC_API_KEY'] = 'key'`
-
-**Issue: "Rate limit exceeded"**
-- Implement exponential backoff and retry
-- Upgrade API tier if available
-- Switch to alternative provider temporarily
-
-**Issue: "Model not found"**
-- Verify model identifier is correct
-- Check API key has access to requested model
-- For Azure: ensure deployment exists with exact name
-
-**Issue: "Timeout errors"**
-- Increase `default_config.timeout_seconds`
-- Break complex tasks into smaller steps
-- Consider using faster model for initial phases
-
-**Issue: "Connection refused (Ollama/Biomni-R0)"**
-- Verify local server is running
-- Check port is not blocked by firewall
-- Confirm `api_base` URL is correct
-
-### Testing Configuration
+### API Key Issues
 
 ```python
-from biomni.utils import list_available_models, validate_environment
+# Verify key is set
+import os
+print(os.getenv('ANTHROPIC_API_KEY'))
 
-# Check environment setup
-status = validate_environment()
-print("Environment Status:", status)
-
-# List available models based on configured keys
-models = list_available_models()
-print("Available Models:", models)
-
-# Test specific model
-try:
-    from biomni.agent import A1
-    agent = A1(path='./data', llm='claude-sonnet-4-20250514')
-    agent.go("Print 'Configuration successful!'")
-except Exception as e:
-    print(f"Configuration test failed: {e}")
+# Or check in Python
+from biomni.config import default_config
+print(default_config.anthropic_api_key)
 ```
 
-## Best Practices Summary
+### Rate Limiting
 
-1. **For most users:** Start with Claude Sonnet 4 or GPT-4o
-2. **For cost sensitivity:** Use GPT-3.5-turbo for exploration, Claude Sonnet 4 for production
-3. **For privacy/offline:** Deploy Ollama locally
-4. **For complex reasoning:** Use Biomni-R0 if hardware available
-5. **For enterprise:** Consider Azure OpenAI or AWS Bedrock
-6. **For speed:** Use Groq for rapid iteration
+```python
+from biomni.config import default_config
 
-7. **Always:**
-   - Set appropriate timeouts
-   - Implement error handling and retries
-   - Log model and configuration for reproducibility
-   - Test configuration before production use
+# Add retry logic
+default_config.max_retries = 5
+default_config.retry_delay = 10  # seconds
+
+# Reduce concurrency
+default_config.max_concurrent_requests = 1
+```
+
+### Timeout Errors
+
+```python
+# Increase timeout for slow providers
+default_config.llm_timeout = 120  # seconds
+
+# Or switch to faster model
+default_config.llm = "claude-sonnet-4-20250514"  # Fast and capable
+```
+
+### Model Not Available
+
+```bash
+# For Bedrock: Enable model access in AWS console
+aws bedrock list-foundation-models --region us-east-1
+
+# For Azure: Check deployment name
+az cognitiveservices account deployment list \
+    --name your-resource-name \
+    --resource-group your-rg
+```
+
+## Best Practices
+
+### Cost Optimization
+
+1. **Use appropriate models** - Don't use Opus 4 for simple queries
+2. **Enable caching** - Reuse data lake access across tasks
+3. **Batch processing** - Group similar tasks together
+4. **Monitor usage** - Track API costs per task type
+
+```python
+from biomni.config import default_config
+
+# Enable response caching
+default_config.enable_caching = True
+default_config.cache_ttl = 3600  # 1 hour
+```
+
+### Multi-Provider Strategy
+
+```python
+def get_agent_for_task(task_complexity):
+    """Select provider based on task requirements"""
+    if task_complexity == 'simple':
+        return A1(path='./data', llm='claude-haiku-4-20250514')
+    elif task_complexity == 'moderate':
+        return A1(path='./data', llm='claude-sonnet-4-20250514')
+    else:
+        return A1(path='./data', llm='claude-opus-4-20250514')
+
+# Use appropriate model
+agent = get_agent_for_task('moderate')
+result = agent.go(task_query)
+```
+
+### Fallback Configuration
+
+```python
+from biomni.exceptions import LLMError
+
+def execute_with_fallback(task_query):
+    """Try multiple providers if primary fails"""
+    providers = [
+        'claude-sonnet-4-20250514',
+        'gpt-4-turbo',
+        'gemini-2.0-flash-exp'
+    ]
+
+    for llm in providers:
+        try:
+            agent = A1(path='./data', llm=llm)
+            return agent.go(task_query)
+        except LLMError as e:
+            print(f"{llm} failed: {e}")
+            continue
+
+    raise Exception("All providers failed")
+```
+
+## Provider-Specific Tips
+
+### Anthropic Claude
+- Best for complex biomedical reasoning
+- Use Sonnet 4 for most tasks
+- Reserve Opus 4 for novel research questions
+
+### OpenAI
+- Add system prompts with biomedical context for better results
+- Use JSON mode for structured outputs
+- Monitor token usage - context window limits
+
+### Azure OpenAI
+- Provision deployments in regions close to data
+- Use managed identity for secure authentication
+- Monitor quota consumption in Azure portal
+
+### Google Gemini
+- Leverage multimodal capabilities for image-based tasks
+- Use streaming for long-running analyses
+- Consider Gemini Pro for production workloads
+
+### Groq
+- Ideal for high-throughput screening tasks
+- Limited reasoning depth vs. Claude/GPT-4
+- Best for well-defined, structured problems
+
+### AWS Bedrock
+- Use IAM roles instead of access keys when possible
+- Enable CloudWatch logging for debugging
+- Monitor cross-region latency
diff --git a/scientific-packages/biomni/references/task_examples.md b/scientific-packages/biomni/references/task_examples.md
deleted file mode 100644
index 4294c7d..0000000
--- a/scientific-packages/biomni/references/task_examples.md
+++ /dev/null
@@ -1,1472 +0,0 @@
-# Biomni Task Examples
-
-Comprehensive collection of biomedical task examples with code patterns and best practices.
-
-## Table of Contents
-
-1. [Single-Cell RNA-seq Analysis](#single-cell-rna-seq-analysis)
-2. [CRISPR Screening](#crispr-screening)
-3. [Genomic Analysis (GWAS, Variant Calling)](#genomic-analysis)
-4. [Protein Structure and Function](#protein-structure-and-function)
-5. [Drug Discovery and ADMET](#drug-discovery-and-admet)
-6. [Pathway and Network Analysis](#pathway-and-network-analysis)
-7. [Disease Classification](#disease-classification)
-8. [Multi-Omics Integration](#multi-omics-integration)
-9. [Proteomics Analysis](#proteomics-analysis)
-10. [Biomarker Discovery](#biomarker-discovery)
-
----
-
-## Single-Cell RNA-seq Analysis
-
-### Basic scRNA-seq Pipeline
-
-```python
-from biomni.agent import A1
-
-agent = A1(path='./data', llm='claude-sonnet-4-20250514')
-
-agent.go("""
-Analyze the 10X Genomics scRNA-seq dataset located at 'data/pbmc_10k.h5ad'.
-
-Workflow:
-1. Load the data and perform QC:
-   - Filter cells with <200 genes or >5000 genes
-   - Filter cells with >10% mitochondrial reads
-   - Filter genes expressed in <3 cells
-
-2. Normalize and identify highly variable genes:
-   - Use SCTransform or standard log-normalization
-   - Identify top 2000 HVGs
-
-3. Dimensionality reduction:
-   - PCA (50 components)
-   - UMAP for visualization
-
-4. Clustering:
-   - Find neighbors (k=10)
-   - Leiden clustering with resolution 0.5
-
-5. Visualization:
-   - UMAP colored by cluster
-   - QC metrics on UMAP
-
-Save processed data as 'results/pbmc_processed.h5ad'
-""")
-```
-
-### Cell Type Annotation
-
-```python
-agent.go("""
-Using the processed PBMC data at 'results/pbmc_processed.h5ad':
-
-1. Find marker genes for each cluster:
-   - Wilcoxon rank-sum test
-   - Log fold change > 0.5
-   - Adjusted p-value < 0.01
-   - Present in >25% of cluster cells
-
-2. Annotate cell types using markers:
-   - T cells: CD3D, CD3E, CD3G
-   - B cells: CD19, MS4A1 (CD20)
-   - NK cells: GNLY, NKG7, NCAM1
-   - Monocytes: CD14, LYZ, CD68
-   - Dendritic cells: FCER1A, CD1C
-
-3. Create visualization:
-   - UMAP with cell type labels
-   - Dotplot of marker genes by cell type
-   - Proportion of cell types (bar plot)
-
-4. Save annotated data with cell types
-""")
-```
-
-### Differential Expression Between Conditions
-
-```python
-agent.go("""
-Compare gene expression between stimulated and control conditions:
-
-Data: 'data/immune_stim_experiment.h5ad' (contains 'condition' metadata)
-
-Analysis:
-1. Subset to T cells only (cell_type == 'T cell')
-
-2. Differential expression between stim vs control:
-   - Use pseudobulk approach (aggregate by donor + condition)
-   - DESeq2 or edgeR for statistical testing
-   - Filter: |log2FC| > 1, padj < 0.05
-
-3. Pathway enrichment on DEGs:
-   - Use GO biological processes
-   - Use KEGG pathways
-   - Run enrichment analysis with gprofiler or enrichr
-
-4. Visualization:
-   - Volcano plot of DEGs
-   - Heatmap of top 50 DEGs
-   - Bar plot of top enriched pathways
-
-5. Export results table with gene symbols, log2FC, p-values, and pathway annotations
-""")
-```
-
-### Trajectory Analysis
-
-```python
-agent.go("""
-Perform pseudotime trajectory analysis on hematopoietic differentiation data:
-
-Data: 'data/hematopoiesis.h5ad'
-
-Steps:
-1. Subset to progenitor and mature cell types:
-   - HSC, MPP, GMP, Monocytes, Neutrophils
-
-2. Run trajectory inference:
-   - Use PAGA or Monocle3
-   - Set HSC as root cell type
-
-3. Calculate pseudotime for all cells
-
-4. Identify trajectory-associated genes:
-   - Genes that change along pseudotime
-   - Statistical test with FDR < 0.05
-   - Cluster genes by expression pattern (early, middle, late)
-
-5. Visualization:
-   - UMAP colored by pseudotime
-   - Heatmap of trajectory genes
-   - Gene expression along pseudotime for key TFs
-
-6. Functional analysis:
-   - GO enrichment for early/middle/late gene clusters
-""")
-```
-
-### Integration of Multiple Datasets
-
-```python
-agent.go("""
-Integrate three scRNA-seq datasets from different batches:
-
-Data files:
-- 'data/batch1_pbmc.h5ad'
-- 'data/batch2_pbmc.h5ad'
-- 'data/batch3_pbmc.h5ad'
-
-Integration workflow:
-1. Load all three datasets
-
-2. Perform individual QC on each batch:
-   - Same filters as standard QC
-   - Note batch-specific statistics
-
-3. Integration using Harmony or Scanorama:
-   - Concatenate datasets
-   - Identify HVGs on combined data
-   - Run batch correction
-   - Verify batch mixing with LISI score
-
-4. Re-cluster integrated data:
-   - Use corrected embeddings
-   - Leiden clustering
-
-5. Cell type annotation on integrated data
-
-6. Visualization:
-   - UMAP split by batch (before/after correction)
-   - UMAP colored by cell type
-   - Batch mixing statistics
-
-7. Save integrated dataset
-""")
-```
-
----
-
-## CRISPR Screening
-
-### Guide RNA Design
-
-```python
-agent.go("""
-Design guide RNAs for CRISPR knockout screening of cell cycle genes:
-
-Target genes:
-- CDK1, CDK2, CDK4, CDK6
-- CCNA2, CCNB1, CCND1, CCNE1
-- TP53, RB1, MYC
-
-Requirements:
-1. Design 4-6 guides per gene targeting early exons
-
-2. For each guide, evaluate:
-   - On-target efficiency score (Doench 2016)
-   - Off-target potential (CFD score < 0.3)
-   - Avoid common SNPs (1000 Genomes)
-
-3. Add control guides:
-   - 100 non-targeting controls
-   - 20 positive controls (essential genes)
-
-4. Output:
-   - Table with: gene, guide_sequence, PAM, position, on_target_score, off_target_count
-   - Sequences in format for oligonucleotide ordering
-   - Visual summary of guide distribution per gene
-
-Reference genome: hg38
-""")
-```
-
-### CRISPR Screen Analysis
-
-```python
-agent.go("""
-Analyze data from a genome-wide CRISPR knockout screen:
-
-Data: 'data/crispr_screen_counts.csv'
-- Columns: guide_id, gene, sample_T0, sample_T15, replicate
-- ~80,000 guides targeting ~18,000 genes
-
-Analysis:
-1. Quality control:
-   - Guide representation (reads per guide)
-   - Sample correlation
-   - Remove guides with <30 reads in T0
-
-2. Normalize counts:
-   - Reads per million (RPM)
-   - Log2 fold change (T15 vs T0)
-
-3. Statistical analysis using MAGeCK:
-   - Identify significantly depleted/enriched genes
-   - FDR < 0.05
-   - Rank genes by robust rank aggregation (RRA)
-
-4. Functional analysis:
-   - Pathway enrichment of hit genes
-   - Known vs novel essential genes
-   - Correlation with Cancer Dependency Map
-
-5. Visualization:
-   - Scatterplot: log2FC vs -log10(FDR)
-   - Heatmap: top 50 depleted genes across replicates
-   - Network: PPI network of hit genes
-
-6. Export:
-   - Ranked gene list with statistics
-   - Enriched pathways table
-""")
-```
-
-### Pooled Optical Screening Analysis
-
-```python
-agent.go("""
-Analyze pooled CRISPR screen with imaging readout (e.g., Cell Painting):
-
-Data structure:
-- 'data/guide_assignments.csv': cell_id, guide_id, gene
-- 'data/morphology_features.csv': cell_id, feature_1...feature_500
-
-Analysis:
-1. Feature preprocessing:
-   - Remove low-variance features
-   - Normalize features (z-score per plate)
-   - PCA for dimensionality reduction
-
-2. Associate phenotypes with perturbations:
-   - Aggregate cells by guide (mean/median)
-   - Calculate morphological distance from controls
-   - Statistical test for phenotype change
-
-3. Identify phenotype-altering genes:
-   - Mahalanobis distance from control distribution
-   - Bonferroni correction for multiple testing
-   - Effect size threshold
-
-4. Cluster genes by phenotype similarity:
-   - Hierarchical clustering of gene profiles
-   - Identify phenotype classes
-
-5. Validation and interpretation:
-   - Compare to known gene functions
-   - Pathway enrichment per phenotype cluster
-
-6. Visualization:
-   - UMAP of all perturbations
-   - Heatmap of gene clusters × morphology features
-   - Representative images for each cluster
-""")
-```
-
----
-
-## Genomic Analysis
-
-### GWAS Analysis
-
-```python
-agent.go("""
-Perform genome-wide association study for Type 2 Diabetes:
-
-Data:
-- 'data/genotypes.bed' (PLINK format, 500K SNPs, 5000 cases, 5000 controls)
-- 'data/phenotypes.txt' (sample_id, T2D_status, age, sex, BMI, ancestry_PCs)
-
-Workflow:
-1. Quality control:
-   - SNP QC: MAF > 0.01, HWE p > 1e-6, genotyping rate > 0.95
-   - Sample QC: genotyping rate > 0.95, heterozygosity check
-   - Remove related individuals (kinship > 0.125)
-
-2. Association testing:
-   - Logistic regression: T2D ~ SNP + age + sex + BMI + PC1-10
-   - Genome-wide significance threshold: p < 5e-8
-   - Suggestive threshold: p < 1e-5
-
-3. Post-GWAS analysis:
-   - LD clumping (r² > 0.1, 500kb window)
-   - Annotate lead SNPs with nearby genes (±100kb)
-   - Query GWAS Catalog for known associations
-
-4. Functional annotation:
-   - Overlap with regulatory elements (ENCODE)
-   - eQTL colocalization (GTEx)
-   - GWAS prioritization scores (PoPS, ABC)
-
-5. Visualization:
-   - Manhattan plot
-   - QQ plot
-   - Regional association plots for top loci
-   - Locus zoom plots
-
-6. Heritability and genetic correlation:
-   - SNP heritability (LDSC)
-   - Genetic correlation with related traits
-
-Export summary statistics for meta-analysis
-""")
-```
-
-### Whole Exome Sequencing Analysis
-
-```python
-agent.go("""
-Analyze whole exome sequencing data for rare disease diagnosis:
-
-Data: Family trio (proband, mother, father)
-- 'data/proband.bam'
-- 'data/mother.bam'
-- 'data/father.bam'
-
-Phenotype: Developmental delay, seizures, intellectual disability
-
-Pipeline:
-1. Variant calling:
-   - GATK HaplotypeCaller on each sample
-   - Joint genotyping across trio
-   - VQSR filtering (SNPs and indels separately)
-
-2. Variant annotation:
-   - Functional consequence (VEP or ANNOVAR)
-   - Population frequencies (gnomAD)
-   - Pathogenicity predictions (CADD, REVEL, SpliceAI)
-   - Disease databases (ClinVar, OMIM)
-
-3. Inheritance analysis:
-   - De novo variants (absent in both parents)
-   - Compound heterozygous variants
-   - Rare homozygous variants (autozygosity)
-   - X-linked variants (if proband is male)
-
-4. Filtering strategy:
-   - Population AF < 0.001 (gnomAD)
-   - High-quality variants (GQ > 20, DP > 10)
-   - Loss-of-function or missense with CADD > 20
-   - Match phenotype to gene function (HPO terms)
-
-5. Prioritization:
-   - Known disease genes for phenotype
-   - De novo in intolerant genes (pLI > 0.9)
-   - Protein-truncating variants
-
-6. Report:
-   - Top candidate variants with evidence
-   - Gene function and disease association
-   - Segregation analysis
-   - Recommended validation (Sanger sequencing)
-   - ACMG variant classification
-
-Save VCF with annotations and prioritized candidate list
-""")
-```
-
-### Variant Calling from RNA-seq
-
-```python
-agent.go("""
-Identify expressed variants from RNA-seq data:
-
-Data: Tumor RNA-seq BAM file
-- 'data/tumor_RNAseq.bam'
-- Reference: hg38
-
-Purpose: Identify expressed somatic mutations for neoantigen prediction
-
-Steps:
-1. Pre-processing:
-   - Mark duplicates (Picard)
-   - Split reads at junctions (GATK SplitNCigarReads)
-   - Base quality recalibration
-
-2. Variant calling:
-   - GATK HaplotypeCaller (RNA-seq mode)
-   - Filter: DP > 10, AF > 0.05
-
-3. Filtering artifacts:
-   - Remove common SNPs (gnomAD AF > 0.001)
-   - Filter intronic/intergenic variants
-   - Remove known RNA editing sites (RADAR database)
-   - Panel of normals (if available)
-
-4. Annotation:
-   - Functional impact (VEP)
-   - Identify non-synonymous variants
-   - Predict MHC binding (NetMHCpan for patient HLA type)
-
-5. Prioritize neoantigens:
-   - Strong MHC binding (IC50 < 500nM)
-   - High expression (TPM > 5)
-   - High variant allele frequency
-
-6. Output:
-   - Annotated VCF
-   - Neoantigen candidates table
-   - Peptide sequences for validation
-
-This requires patient HLA typing data
-""")
-```
-
----
-
-## Protein Structure and Function
-
-### Protein Structure Prediction and Analysis
-
-```python
-agent.go("""
-Predict and analyze structure for novel protein sequence:
-
-Sequence (FASTA format):
->Novel_Kinase_Domain
-MKLLVVDDDGVADYSKRDGAFMVAYCIEPGDG...
-
-Tasks:
-1. Structure prediction:
-   - Use AlphaFold2 or ESMFold
-   - Generate 5 models, rank by confidence
-
-2. Quality assessment:
-   - pLDDT scores (per-residue confidence)
-   - pTM score (global confidence)
-   - Identify low-confidence regions
-
-3. Domain identification:
-   - InterProScan for domain architecture
-   - Pfam domain search
-   - Identify catalytic residues
-
-4. Functional site prediction:
-   - Active site prediction
-   - Substrate binding pocket identification
-   - Post-translational modification sites
-
-5. Structural alignment:
-   - Search for similar structures (PDB)
-   - Align to close homologs
-   - Identify conserved structural motifs
-
-6. Mutation analysis:
-   - Known disease mutations in homologs
-   - Predict impact on structure (Rosetta ddG)
-
-7. Visualization and output:
-   - PyMOL/Chimera visualization scripts
-   - Structural alignment figures
-   - Annotated PDB file with functional sites
-   - Summary report with predictions
-""")
-```
-
-### Protein-Protein Interaction Prediction
-
-```python
-agent.go("""
-Predict and validate protein-protein interactions:
-
-Target protein: BRCA1
-Species: Human
-
-Analysis:
-1. Literature-based interactions:
-   - Query BioGRID, STRING, IntAct databases
-   - Extract high-confidence interactors (score > 0.7)
-
-2. Structure-based prediction:
-   - Predict BRCA1 structure (if not available)
-   - Dock with known interactors (BRCA2, BARD1, etc.)
-   - Score interfaces (PISA, PDBePISA)
-
-3. Sequence-based prediction:
-   - Coevolution analysis (EVcouplings)
-   - Domain-domain interaction prediction
-   - Linear motif search (ELM database)
-
-4. Functional analysis of interactors:
-   - GO enrichment analysis
-   - KEGG pathway membership
-   - Tissue/cell type expression patterns
-
-5. Network analysis:
-   - Build PPI network
-   - Identify network modules
-   - Central hub proteins
-
-6. Experimental validation suggestions:
-   - Prioritize interactions for validation
-   - Suggest Co-IP or Y2H experiments
-   - Identify commercially available antibodies
-
-7. Output:
-   - Ranked interaction list with evidence
-   - PPI network visualization
-   - Structural models of key interactions
-""")
-```
-
-### Protein Engineering Design
-
-```python
-agent.go("""
-Design improved enzyme variant with enhanced thermostability:
-
-Target enzyme: TEM-1 β-lactamase
-Goal: Increase melting temperature by >10°C while maintaining activity
-
-Strategy:
-1. Analyze current structure:
-   - Load PDB structure (1BTL)
-   - Identify flexible regions (B-factors)
-   - Find potential disulfide bond sites
-
-2. Computational design:
-   - Rosetta design simulations
-   - Identify stabilizing mutations (ΔΔG < -1.0 kcal/mol)
-   - Avoid active site and substrate binding regions
-
-3. Prioritize mutations:
-   - Surface entropy reduction (SER)
-   - Disulfide bond introduction
-   - Salt bridge formation
-   - Hydrophobic core packing
-
-4. Check conservation:
-   - Multiple sequence alignment of β-lactamases
-   - Avoid highly conserved positions
-   - Prefer positions with natural variation
-
-5. Design library:
-   - Rank top 20 single mutants
-   - Design 5 combinatorial variants (2-3 mutations)
-   - Ensure codon optimization for E. coli
-
-6. Validation plan:
-   - Expression and purification protocol
-   - Thermal shift assay (DSF)
-   - Activity assay (nitrocefin)
-   - Recommended high-throughput screening
-
-7. Output:
-   - Ranked mutation list with predicted ΔΔG
-   - Structural visualizations
-   - Codon-optimized sequences
-   - Cloning primers
-   - Experimental validation protocol
-""")
-```
-
----
-
-## Drug Discovery and ADMET
-
-### Virtual Screening
-
-```python
-agent.go("""
-Perform virtual screening for SARS-CoV-2 Mpro inhibitors:
-
-Target: SARS-CoV-2 Main protease (Mpro)
-Crystal structure: PDB 6LU7
-
-Compound library: ZINC15 drug-like subset (~100K compounds)
-File: 'data/zinc_druglike_100k.smi' (SMILES format)
-
-Workflow:
-1. Protein preparation:
-   - Remove crystallographic waters (keep catalytic waters)
-   - Add hydrogens, optimize H-bond network
-   - Define binding site (residues within 5Å of native ligand)
-
-2. Ligand preparation:
-   - Generate 3D coordinates from SMILES
-   - Enumerate tautomers and protonation states
-   - Energy minimization
-
-3. Molecular docking:
-   - Dock all compounds (AutoDock Vina or Glide)
-   - Generate top 3 poses per compound
-   - Score binding affinity
-
-4. Consensus scoring:
-   - Combine multiple scoring functions
-   - Rank compounds by consensus score
-
-5. ADMET filtering:
-   - Lipinski's rule of 5
-   - BBB permeability (not needed for this target)
-   - hERG liability (pIC50 > 5)
-   - CYP450 inhibition prediction
-   - Toxicity prediction (Tox21)
-
-6. Visual inspection:
-   - Top 100 compounds
-   - Check key interactions (His41, Cys145 catalytic dyad)
-   - Remove PAINS and frequent hitters
-
-7. Final selection:
-   - Top 20 compounds for experimental testing
-   - Cluster by scaffold diversity
-
-8. Output:
-   - Ranked compound list with scores and ADMET properties
-   - Docking poses (mol2 or PDB format)
-   - 2D interaction diagrams
-   - Purchase availability from vendors
-""")
-```
-
-### ADMET Property Prediction
-
-```python
-agent.go("""
-Predict ADMET properties for drug candidate series:
-
-Input: 'data/compound_series.smi' (25 analogs, SMILES format)
-Lead scaffold: Novel kinase inhibitor series
-
-Properties to predict:
-1. Absorption:
-   - Caco-2 permeability
-   - Human intestinal absorption (HIA)
-   - P-glycoprotein substrate
-
-2. Distribution:
-   - Plasma protein binding (% bound)
-   - Volume of distribution (VDss)
-   - Blood-brain barrier permeability (LogBB)
-
-3. Metabolism:
-   - CYP450 substrate (1A2, 2C9, 2C19, 2D6, 3A4)
-   - CYP450 inhibition (same isoforms)
-   - Sites of metabolism (SOM prediction)
-
-4. Excretion:
-   - Clearance estimation
-   - Half-life prediction
-   - Renal excretion likelihood
-
-5. Toxicity:
-   - hERG inhibition (cardiotoxicity)
-   - AMES mutagenicity
-   - Hepatotoxicity
-   - Skin sensitization
-   - Rat acute toxicity (LD50)
-
-6. Drug-likeness:
-   - Lipinski's Ro5
-   - QED score
-   - Synthetic accessibility
-
-Analysis:
-- Compare all analogs in the series
-- Structure-property relationships
-- Identify best balanced compound
-- Suggest modifications for improvement
-
-Output:
-- Comprehensive ADMET table
-- Radar plots for each compound
-- SAR analysis for each property
-- Recommendations for next design iteration
-""")
-```
-
-### Lead Optimization
-
-```python
-agent.go("""
-Optimize lead compound balancing potency and selectivity:
-
-Current lead:
-- IC50 (target kinase): 50 nM
-- IC50 (off-target kinases): 100-500 nM (poor selectivity)
-- Microsomal stability: t1/2 = 20 min (too short)
-- Solubility: 5 μM (low)
-
-Goal: Maintain potency, improve selectivity (>100x), improve PK properties
-
-Strategy:
-1. Analyze current binding mode:
-   - Docking to target and off-targets
-   - Identify selectivity-determining residues
-   - Map interaction hotspots
-
-2. Design focused library:
-   - Modifications to improve selectivity:
-     * Target residues unique to on-target
-     * Avoid conserved kinase regions
-   - Modifications to improve solubility:
-     * Add polar groups to solvent-exposed regions
-     * Replace lipophilic groups
-   - Modifications to improve metabolic stability:
-     * Block metabolically labile positions
-     * Replace metabolically unstable groups
-
-3. Virtual enumeration:
-   - Generate ~200 analogs
-   - Predict binding affinity (docking)
-   - Predict ADMET properties
-
-4. Multi-parameter optimization:
-   - Calculate MPO score (potency + selectivity + ADMET)
-   - Pareto optimization
-   - Select top 20 compounds
-
-5. Clustering and diversity:
-   - Ensure structural diversity
-   - Test different modification strategies
-
-6. Synthetic feasibility:
-   - Retrosynthetic analysis
-   - Flag difficult syntheses
-   - Prioritize 10 compounds for synthesis
-
-7. Deliverables:
-   - Ranked compound designs
-   - Predicted properties table
-   - Binding mode visualizations
-   - Synthetic routes
-   - Recommended testing cascade
-""")
-```
-
----
-
-## Pathway and Network Analysis
-
-### Pathway Enrichment Analysis
-
-```python
-agent.go("""
-Perform comprehensive pathway enrichment on differentially expressed genes:
-
-Input: 'data/DEGs.csv'
-Columns: gene_symbol, log2FC, padj
-Significant DEGs: padj < 0.05, |log2FC| > 1
-Total: 450 upregulated, 380 downregulated genes
-
-Background: all detected genes in the experiment (~15,000)
-
-Analysis:
-1. GO enrichment (biological processes):
-   - Test upregulated and downregulated genes separately
-   - Use hypergeometric test
-   - FDR correction (Benjamini-Hochberg)
-   - Filter: padj < 0.05, fold enrichment > 2
-
-2. KEGG pathway enrichment:
-   - Same approach as GO
-   - Focus on signaling and metabolic pathways
-
-3. Reactome pathway enrichment:
-   - More detailed pathway hierarchy
-
-4. Disease association:
-   - DisGeNET disease enrichment
-   - Compare to disease gene signatures (MSigDB)
-
-5. Transcription factor enrichment:
-   - Predict upstream regulators (ChEA3)
-   - ENCODE ChIP-seq enrichment
-
-6. Drug/compound perturbations:
-   - L1000 connectivity map
-   - Identify drugs that reverse/mimic signature
-
-7. Cross-pathway analysis:
-   - Pathway crosstalk
-   - Hierarchical clustering of pathways by gene overlap
-   - Network visualization of enriched pathways
-
-8. Visualization:
-   - Dot plots (GO, KEGG, Reactome)
-   - Enrichment map network
-   - Chord diagram (genes-pathways)
-   - Treemap of hierarchical GO terms
-
-9. Export:
-   - All enrichment tables
-   - Pathway gene lists
-   - Interactive HTML report
-""")
-```
-
-### Protein-Protein Interaction Network
-
-```python
-agent.go("""
-Build and analyze PPI network for Alzheimer's disease genes:
-
-Seed genes: Known AD risk genes (APP, PSEN1, PSEN2, APOE, MAPT, etc.)
-File: 'data/AD_seed_genes.txt'
-
-Network construction:
-1. Build network from seed genes:
-   - Query STRING database (confidence > 0.7)
-   - Include direct and second-degree interactors
-   - Maximum network size: 500 proteins
-
-2. Network enrichment:
-   - Add disease associations (DisGeNET)
-   - Add tissue expression (GTEx - prioritize brain)
-   - Add functional annotations (GO, Reactome)
-
-3. Network analysis:
-   - Calculate centrality measures:
-     * Degree centrality
-     * Betweenness centrality
-     * Eigenvector centrality
-   - Identify hub proteins
-   - Community detection (Louvain algorithm)
-
-4. Module analysis:
-   - Functional enrichment per community
-   - Identify disease-relevant modules
-   - Key bridge proteins between modules
-
-5. Druggability analysis:
-   - Identify druggable targets (DGIdb)
-   - Known drugs targeting network proteins
-   - Clinical trial status
-
-6. Network perturbation:
-   - Simulate gene knockout
-   - Network robustness analysis
-   - Identify critical nodes
-
-7. Visualization:
-   - Interactive network (Cytoscape format)
-   - Layout by module membership
-   - Color by centrality/expression
-   - Size by degree
-
-8. Prioritization:
-   - Rank proteins by:
-     * Network centrality
-     * Brain expression
-     * Druggability
-     * Genetic evidence (GWAS)
-   - Top therapeutic targets
-
-Output:
-- Network file (graphML, SIF)
-- Module membership table
-- Prioritized target list
-- Druggable targets with existing compounds
-""")
-```
-
-### Gene Regulatory Network Inference
-
-```python
-agent.go("""
-Infer gene regulatory network from scRNA-seq data:
-
-Data: 'data/development_timecourse.h5ad'
-- Cells from 5 developmental timepoints
-- 3000 HVGs quantified
-
-Goal: Identify TF→target relationships during development
-
-Methods:
-1. Preprocessing:
-   - Select TFs (from TF census list)
-   - Select potential target genes (HVGs)
-   - Normalize expression
-
-2. GRN inference using multiple methods:
-   - GENIE3 (random forest)
-   - SCENIC (motif-based)
-   - CellOracle (perturbation-based)
-   - Pearson/Spearman correlation (baseline)
-
-3. Integrate predictions:
-   - Combine scores from multiple methods
-   - Weight by motif evidence (JASPAR)
-   - Filter low-confidence edges
-
-4. Network refinement:
-   - Remove indirect edges (transitive reduction)
-   - Validate with ChIP-seq data (if available)
-   - Literature validation (TRRUST database)
-
-5. Dynamic network analysis:
-   - TF activity per timepoint/cell state
-   - Identify stage-specific regulators
-   - Find regulatory switches
-
-6. Downstream analysis:
-   - Master regulators (high out-degree)
-   - Regulatory cascades
-   - Feed-forward loops
-   - Coherent vs incoherent motifs
-
-7. Experimental validation priorities:
-   - Rank TF→target edges for validation
-   - Suggest ChIP-seq or CUT&RUN experiments
-   - Suggest perturbation experiments (knockout/CRISPRi)
-
-8. Visualization:
-   - Full GRN network (Cytoscape)
-   - Key TF subnetworks
-   - TF activity heatmap across development
-   - Sankey diagram of regulatory flow
-
-Output:
-- Edge list with confidence scores
-- TF activity matrix
-- Validated vs novel interactions
-- Prioritized validation experiments
-""")
-```
-
----
-
-## Disease Classification
-
-### Cancer Type Classification from Gene Expression
-
-```python
-agent.go("""
-Build multi-class classifier for cancer type prediction:
-
-Data: TCGA pan-cancer RNA-seq data
-- Training: 8000 samples across 33 cancer types
-- Expression: 'data/tcga_expression.csv' (samples × genes)
-- Labels: 'data/tcga_labels.csv' (sample_id, cancer_type)
-
-Task: Classify tumor samples by cancer type
-
-Pipeline:
-1. Data preprocessing:
-   - Log2(TPM + 1) transformation
-   - Remove low-variance genes (variance < 0.1)
-   - Z-score normalization
-
-2. Feature selection:
-   - Variance filtering (top 5000 genes)
-   - Univariate feature selection (ANOVA F-test)
-   - Select top 500 features
-
-3. Train-test split:
-   - 80% train, 20% test
-   - Stratified by cancer type
-
-4. Model training (compare multiple algorithms):
-   - Random Forest
-   - Gradient Boosting (XGBoost)
-   - Neural Network (MLP)
-   - Elastic Net logistic regression
-
-5. Model evaluation:
-   - Accuracy, precision, recall per class
-   - Confusion matrix
-   - ROC curves (one-vs-rest)
-   - Feature importance ranking
-
-6. Model interpretation:
-   - SHAP values for predictions
-   - Top predictive genes per cancer type
-   - Pathway enrichment of predictive features
-
-7. Clinical validation:
-   - Test on independent dataset (if available)
-   - Analyze misclassifications
-   - Identify hard-to-classify subtypes
-
-8. Deliverables:
-   - Trained model (pickle)
-   - Performance metrics report
-   - Feature importance table
-   - Confusion matrix heatmap
-   - Prediction script for new samples
-""")
-```
-
-### Disease Risk Prediction from Multi-Omics
-
-```python
-agent.go("""
-Develop integrative model predicting cardiovascular disease risk:
-
-Data sources:
-1. Genotypes: 'data/genotypes.csv' (500K SNPs, polygenic risk scores)
-2. Clinical: 'data/clinical.csv' (age, sex, BMI, blood pressure, cholesterol)
-3. Proteomics: 'data/proteomics.csv' (200 plasma proteins, Olink panel)
-4. Metabolomics: 'data/metabolomics.csv' (150 metabolites)
-
-Outcome: 10-year CVD incidence (binary)
-- Cases: 800
-- Controls: 3200
-
-Approach:
-1. Data preprocessing:
-   - Impute missing values (missForest)
-   - Transform skewed features (log/Box-Cox)
-   - Normalize each omics layer separately
-
-2. Feature engineering:
-   - Calculate PRS from SNP data
-   - Interaction terms (age × metabolites, etc.)
-   - Metabolite ratios (known CVD markers)
-
-3. Feature selection per omics:
-   - Lasso for each data type
-   - Select informative features
-
-4. Integration strategies (compare):
-   - Early integration: concatenate all features
-   - Late integration: separate models, combine predictions
-   - Intermediate integration: Multi-omics factor analysis (MOFA)
-
-5. Model development:
-   - Logistic regression (interpretable baseline)
-   - Random Forest
-   - Elastic Net
-   - Neural network with omics-specific layers
-
-6. Cross-validation:
-   - 5-fold CV, stratified
-   - Hyperparameter tuning
-   - Calculate confidence intervals
-
-7. Model evaluation:
-   - AUC-ROC, AUC-PR
-   - Calibration plots
-   - Net reclassification improvement (NRI)
-   - Compare to clinical models (Framingham, SCORE)
-
-8. Interpretation:
-   - Feature importance (permutation importance)
-   - SHAP values for individuals
-   - Identify most informative omics layer
-
-9. Clinical utility:
-   - Decision curve analysis
-   - Risk stratification groups
-   - Biomarker panel selection
-
-Outputs:
-- Model comparison table
-- ROC curves all models
-- Feature importance per omics
-- Reclassification table
-- Clinical implementation recommendations
-""")
-```
-
----
-
-## Multi-Omics Integration
-
-### Multi-Omics Data Integration
-
-```python
-agent.go("""
-Integrate transcriptomics, proteomics, and metabolomics data:
-
-Study: Drug response in cancer cell lines
-Data:
-- RNA-seq: 'data/transcriptomics.csv' (15000 genes × 50 cell lines)
-- Proteomics: 'data/proteomics.csv' (3000 proteins × 50 cell lines)
-- Metabolomics: 'data/metabolomics.csv' (200 metabolites × 50 cell lines)
-- Drug response: 'data/drug_response.csv' (cell line, drug, IC50)
-
-Goal: Identify multi-omics signatures of drug sensitivity
-
-Analysis:
-1. Data preprocessing:
-   - Match samples across omics layers
-   - Filter low-variance features per omics
-   - Normalize each omics separately (z-score)
-
-2. Integration methods (compare):
-
-   **Method 1: MOFA (Multi-Omics Factor Analysis)**
-   - Identify latent factors capturing variance across omics
-   - Determine factor contributions per omics
-   - Relate factors to drug response
-
-   **Method 2: DIABLO (sparse PLS-DA)**
-   - Supervised integration
-   - Maximize covariance between omics and drug response
-   - Select features from each omics layer
-
-   **Method 3: Similarity Network Fusion (SNF)**
-   - Build patient similarity networks per omics
-   - Fuse networks
-   - Cluster cell lines by integrated similarity
-
-3. Association with drug response:
-   - Correlation of factors/components with IC50
-   - Identify drug-sensitive vs resistant groups
-   - Multi-omics biomarkers
-
-4. Network analysis:
-   - Build multi-layer network:
-     * Gene regulatory network (RNA)
-     * Protein-protein interactions (proteins)
-     * Gene-metabolite associations
-   - Integrate layers
-   - Find dysregulated pathways
-
-5. Predictive modeling:
-   - Train model predicting drug response from multi-omics
-   - Compare: using all omics vs individual omics
-   - Feature selection across omics
-
-6. Biological interpretation:
-   - Map features to pathways
-   - Identify mechanism of drug action
-   - Suggest combination therapies
-
-7. Validation:
-   - Leave-one-out cross-validation
-   - Test in independent cell line panel
-
-Outputs:
-- Factor loadings per omics (MOFA)
-- Multi-omics biomarker signature
-- Integrated network visualization
-- Predictive model of drug response
-- Mechanistic hypotheses
-""")
-```
-
----
-
-## Proteomics Analysis
-
-### Label-Free Quantitative Proteomics
-
-```python
-agent.go("""
-Analyze label-free proteomics data from mass spectrometry:
-
-Study: Comparison of normal vs diseased tissue (n=6 per group)
-Data: MaxQuant output
-- 'data/proteinGroups.txt' (MaxQuant protein quantification)
-- 'data/peptides.txt' (peptide-level data)
-
-Experimental design:
-- 6 normal samples
-- 6 disease samples
-- TMT-labeled, 3 fractions each
-
-Analysis:
-1. Data loading and QC:
-   - Load proteinGroups.txt
-   - Remove contaminants, reverse hits
-   - Filter: valid values in ≥50% of samples per group
-   - Check sample correlations and outliers
-   - PCA for quality assessment
-
-2. Imputation:
-   - Impute missing values (MAR vs MNAR approach)
-   - Use MinProb for low-abundance missing values
-   - Use kNN for random missing values
-
-3. Normalization:
-   - Median normalization
-   - Or: VSN (variance stabilizing normalization)
-
-4. Differential expression:
-   - Two-sample t-test (for each protein)
-   - Moderated t-test (limma)
-   - Filter: |log2FC| > 0.58 (~1.5-fold), adj.p < 0.05
-
-5. Visualization:
-   - Volcano plot
-   - Heatmap of significant proteins
-   - PCA colored by condition
-   - Intensity distributions (before/after normalization)
-
-6. Functional enrichment:
-   - GO enrichment (up and down separately)
-   - KEGG pathways
-   - Reactome pathways
-   - STRING PPI network of DEPs
-
-7. PTM analysis (if available):
-   - Phosphorylation site analysis
-   - Kinase enrichment analysis (KEA3)
-
-8. Orthogonal validation:
-   - Compare to RNA-seq data (if available)
-   - Protein-RNA correlation
-   - Identify discordant genes
-
-9. Biomarker candidates:
-   - Rank proteins by fold-change and significance
-   - Filter for secreted proteins (potential biomarkers)
-   - Check if targetable (druggable)
-
-Outputs:
-- Differential abundance table
-- QC report with plots
-- Enrichment analysis results
-- PPI network of DEPs
-- Candidate biomarkers list
-""")
-```
-
----
-
-## Biomarker Discovery
-
-### Diagnostic Biomarker Discovery
-
-```python
-agent.go("""
-Discover diagnostic biomarkers for early cancer detection:
-
-Study: Plasma proteomics comparing early-stage cancer vs healthy controls
-Data:
-- 'data/proteomics.csv' (1000 proteins × 200 samples)
-- 'data/metadata.csv' (sample_id, group [cancer/healthy], age, sex)
-
-Groups:
-- Early-stage cancer: 100 samples
-- Healthy controls: 100 samples
-
-Goal: Identify protein panel for early detection (target AUC > 0.90)
-
-Workflow:
-1. Exploratory analysis:
-   - PCA, tSNE to visualize separation
-   - Univariate differential abundance
-   - Volcano plot
-
-2. Feature selection:
-   - Rank proteins by:
-     * Fold change
-     * Statistical significance (t-test, Mann-Whitney)
-     * AUC (each protein individually)
-   - Select proteins with AUC > 0.70
-
-3. Biomarker panel construction:
-   - Correlation analysis (remove redundant markers)
-   - Forward selection:
-     * Start with best single marker
-     * Add markers improving panel performance
-     * Stop when no improvement
-   - Aim for 5-10 marker panel (practical for assay)
-
-4. Model building:
-   - Logistic regression on selected panel
-   - Calculate combined risk score
-   - Cross-validation (10-fold)
-
-5. Performance evaluation:
-   - AUC-ROC, AUC-PR
-   - Sensitivity/specificity at different thresholds
-   - Clinical decision threshold (e.g., 90% sensitivity)
-   - Calibration plot
-
-6. Biological validation:
-   - Literature support for cancer association
-   - Expression in tumor vs blood
-   - Mechanism of release/shedding
-
-7. Clinical utility:
-   - Compare to existing biomarkers (CEA, CA19-9, etc.)
-   - Cost-effectiveness consideration
-   - Assay feasibility (ELISA, MRM, etc.)
-
-8. Independent validation plan:
-   - Power calculation for validation cohort
-   - Suggested sample size
-   - Pre-analytical variables to control
-
-Outputs:
-- Ranked protein list with individual performance
-- Final biomarker panel
-- Logistic regression model
-- ROC curves (individual + panel)
-- Clinical characteristics table
-- Validation study protocol
-""")
-```
-
----
-
-## Additional Advanced Examples
-
-### Spatial Transcriptomics Analysis
-
-```python
-agent.go("""
-Analyze Visium spatial transcriptomics data:
-
-Data: 'data/visium_brain_tumor.h5ad'
-- Contains spatial coordinates and gene expression
-- Tissue: Brain tumor biopsy
-
-Analysis:
-1. Data QC and normalization:
-   - Filter low-quality spots (total counts, detected genes)
-   - Normalize, log-transform
-   - Calculate spatial statistics
-
-2. Spatial clustering:
-   - Graph-based clustering considering spatial proximity
-   - Identify tumor regions, stroma, necrosis, etc.
-
-3. Spatially variable genes:
-   - Test for spatial patterns (Moran's I, SpatialDE)
-   - Identify genes with spatial gradients
-
-4. Deconvolution:
-   - Estimate cell type composition per spot
-   - Use scRNA-seq reference (if available)
-   - Methods: Cell2location, RCTD, SPOTlight
-
-5. Niche analysis:
-   - Define tissue niches by cell type composition
-   - Identify tumor-stroma interface
-   - Analyze cell-cell interactions
-
-6. Spatial pathway analysis:
-   - Map pathway activity onto tissue
-   - Identify spatially localized processes
-
-7. Visualization:
-   - Spatial plots colored by cluster, gene expression
-   - Cell type composition maps
-   - Pathway activity maps
-
-Output:
-- Annotated spatial data object
-- Spatially variable gene list
-- Cell type composition per spot
-- Niche definitions and cell-cell interactions
-""")
-```
-
----
-
-## Tips for Effective Task Specification
-
-### 1. Be Specific About Data Formats and Locations
-
-✅ Good:
-```python
-agent.go("Analyze scRNA-seq data in AnnData format at 'data/experiment1.h5ad'")
-```
-
-❌ Vague:
-```python
-agent.go("Analyze my data")
-```
-
-### 2. Specify Analysis Parameters
-
-✅ Good:
-```python
-agent.go("""
-Cluster cells using Leiden algorithm with resolution 0.5,
-k-neighbors=10, using PCA components 1-30
-""")
-```
-
-❌ Vague:
-```python
-agent.go("Cluster the cells")
-```
-
-### 3. Request Specific Outputs
-
-✅ Good:
-```python
-agent.go("""
-... and save results as:
-- CSV table with statistics
-- PNG figures at 300 DPI
-- Processed data as AnnData at 'results/processed.h5ad'
-""")
-```
-
-❌ Vague:
-```python
-agent.go("... and save the results")
-```
-
-### 4. Provide Biological Context
-
-✅ Good:
-```python
-agent.go("""
-This is a drug treatment experiment. Compare vehicle vs treated groups
-to identify drug-induced transcriptional changes. Focus on apoptosis and
-cell cycle pathways.
-""")
-```
-
-❌ Vague:
-```python
-agent.go("Compare the two groups")
-```
-
-### 5. Break Complex Analyses into Steps
-
-✅ Good:
-```python
-# Step 1
-agent.go("Load and QC the data, save QC metrics")
-
-# Step 2
-agent.go("Based on QC, normalize and find HVGs")
-
-# Step 3
-agent.go("Cluster and annotate cell types")
-```
-
-❌ Overwhelming:
-```python
-agent.go("Do a complete scRNA-seq analysis pipeline")
-```
diff --git a/scientific-packages/biomni/references/use_cases.md b/scientific-packages/biomni/references/use_cases.md
new file mode 100644
index 0000000..e017f28
--- /dev/null
+++ b/scientific-packages/biomni/references/use_cases.md
@@ -0,0 +1,867 @@
+# Biomni Use Cases and Examples
+
+Comprehensive examples demonstrating biomni across biomedical research domains.
+
+## Table of Contents
+
+1. [CRISPR Screening and Gene Editing](#crispr-screening-and-gene-editing)
+2. [Single-Cell RNA-seq Analysis](#single-cell-rna-seq-analysis)
+3. [Drug Discovery and ADMET](#drug-discovery-and-admet)
+4. [GWAS and Genetic Analysis](#gwas-and-genetic-analysis)
+5. [Clinical Genomics and Diagnostics](#clinical-genomics-and-diagnostics)
+6. [Protein Structure and Function](#protein-structure-and-function)
+7. [Literature and Knowledge Synthesis](#literature-and-knowledge-synthesis)
+8. [Multi-Omics Integration](#multi-omics-integration)
+
+---
+
+## CRISPR Screening and Gene Editing
+
+### Example 1: Genome-Wide CRISPR Screen Design
+
+**Task:** Design a CRISPR knockout screen to identify genes regulating autophagy.
+
+```python
+from biomni.agent import A1
+
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Design a genome-wide CRISPR knockout screen to identify genes regulating
+autophagy in HEK293 cells.
+
+Requirements:
+1. Generate comprehensive sgRNA library targeting all protein-coding genes
+2. Design 4 sgRNAs per gene with optimal on-target and minimal off-target scores
+3. Include positive controls (known autophagy regulators: ATG5, BECN1, ULK1)
+4. Include negative controls (non-targeting sgRNAs)
+5. Prioritize genes based on:
+   - Existing autophagy pathway annotations
+   - Protein-protein interactions with known autophagy factors
+   - Expression levels in HEK293 cells
+6. Output sgRNA sequences, scores, and gene prioritization rankings
+
+Provide analysis as Python code and interpret results.
+""")
+
+agent.save_conversation_history("autophagy_screen_design.pdf")
+```
+
+**Expected Output:**
+- sgRNA library with ~80,000 guides (4 per gene × ~20,000 genes)
+- On-target and off-target scores for each sgRNA
+- Prioritized gene list based on pathway enrichment
+- Quality control metrics for library design
+
+### Example 2: CRISPR Off-Target Prediction
+
+```python
+result = agent.go("""
+Analyze potential off-target effects for this sgRNA sequence:
+GCTGAAGATCCAGTTCGATG
+
+Tasks:
+1. Identify all genomic locations with ≤3 mismatches
+2. Score each potential off-target site
+3. Assess likelihood of cleavage at off-target sites
+4. Recommend whether sgRNA is suitable for use
+5. If unsuitable, suggest alternative sgRNAs for the same gene
+""")
+```
+
+### Example 3: Screen Hit Analysis
+
+```python
+result = agent.go("""
+Analyze CRISPR screen results from autophagy phenotype screen.
+
+Input file: screen_results.csv
+Columns: sgRNA_ID, gene, log2_fold_change, p_value, FDR
+
+Tasks:
+1. Identify significant hits (FDR < 0.05, |LFC| > 1.5)
+2. Perform gene ontology enrichment on hit genes
+3. Map hits to known autophagy pathways
+4. Identify novel candidates not previously linked to autophagy
+5. Predict functional relationships between hit genes
+6. Generate visualization of hit genes in pathway context
+""")
+```
+
+---
+
+## Single-Cell RNA-seq Analysis
+
+### Example 1: Cell Type Annotation
+
+**Task:** Analyze single-cell RNA-seq data and annotate cell populations.
+
+```python
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Analyze single-cell RNA-seq dataset from human PBMC sample.
+
+File: pbmc_data.h5ad (10X Genomics format)
+
+Workflow:
+1. Quality control:
+   - Filter cells with <200 or >5000 detected genes
+   - Remove cells with >20% mitochondrial content
+   - Filter genes detected in <3 cells
+
+2. Normalization and preprocessing:
+   - Normalize to 10,000 reads per cell
+   - Log-transform
+   - Identify highly variable genes
+   - Scale data
+
+3. Dimensionality reduction:
+   - PCA (50 components)
+   - UMAP visualization
+
+4. Clustering:
+   - Leiden algorithm with resolution=0.8
+   - Identify cluster markers (Wilcoxon rank-sum test)
+
+5. Cell type annotation:
+   - Annotate clusters using marker genes:
+     * T cells (CD3D, CD3E)
+     * B cells (CD79A, MS4A1)
+     * NK cells (GNLY, NKG7)
+     * Monocytes (CD14, LYZ)
+     * Dendritic cells (FCER1A, CST3)
+
+6. Generate UMAP plots with annotations and export results
+""")
+
+agent.save_conversation_history("pbmc_scrna_analysis.pdf")
+```
+
+### Example 2: Differential Expression Analysis
+
+```python
+result = agent.go("""
+Perform differential expression analysis between conditions in scRNA-seq data.
+
+Data: pbmc_treated_vs_control.h5ad
+Conditions: treated (drug X) vs control
+
+Tasks:
+1. Identify differentially expressed genes for each cell type
+2. Use statistical tests appropriate for scRNA-seq (MAST or Wilcoxon)
+3. Apply multiple testing correction (Benjamini-Hochberg)
+4. Threshold: |log2FC| > 0.5, adjusted p < 0.05
+5. Perform pathway enrichment on DE genes per cell type
+6. Identify cell-type-specific drug responses
+7. Generate volcano plots and heatmaps
+""")
+```
+
+### Example 3: Trajectory Analysis
+
+```python
+result = agent.go("""
+Perform pseudotime trajectory analysis on differentiation dataset.
+
+Data: hematopoiesis_scrna.h5ad
+Starting population: Hematopoietic stem cells (HSCs)
+
+Analysis:
+1. Subset to hematopoietic lineages
+2. Compute diffusion map or PAGA for trajectory inference
+3. Order cells along pseudotime
+4. Identify genes with dynamic expression along trajectory
+5. Cluster genes by expression patterns
+6. Map trajectories to known differentiation pathways
+7. Visualize key transcription factors driving differentiation
+""")
+```
+
+---
+
+## Drug Discovery and ADMET
+
+### Example 1: ADMET Property Prediction
+
+**Task:** Predict ADMET properties for drug candidates.
+
+```python
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Predict ADMET properties for these drug candidates:
+
+Compounds (SMILES format):
+1. CC1=C(C=C(C=C1)NC(=O)C2=CC=C(C=C2)CN3CCN(CC3)C)NC4=NC=CC(=N4)C5=CN=CC=C5
+2. CN1CCN(CC1)C2=C(C=C3C(=C2)N=CN=C3NC4=CC=C(C=C4)F)OC
+3. CC(C)(C)NC(=O)N(CC1=CC=CC=C1)C2CCN(CC2)C(=O)C3=CC4=C(C=C3)OCO4
+
+For each compound, predict:
+
+**Absorption:**
+- Caco-2 permeability (cm/s)
+- Human intestinal absorption (HIA %)
+- Oral bioavailability
+
+**Distribution:**
+- Plasma protein binding (%)
+- Blood-brain barrier penetration (BBB+/-)
+- Volume of distribution (L/kg)
+
+**Metabolism:**
+- CYP450 substrate/inhibitor predictions (2D6, 3A4, 2C9, 2C19)
+- Metabolic stability (T1/2)
+
+**Excretion:**
+- Clearance (mL/min/kg)
+- Half-life (hours)
+
+**Toxicity:**
+- hERG IC50 (cardiotoxicity risk)
+- Hepatotoxicity prediction
+- Ames mutagenicity
+- LD50 estimates
+
+Provide predictions with confidence scores and flag any red flags.
+""")
+
+agent.save_conversation_history("admet_predictions.pdf")
+```
+
+### Example 2: Target Identification
+
+```python
+result = agent.go("""
+Identify potential protein targets for Alzheimer's disease drug development.
+
+Tasks:
+1. Query GWAS data for Alzheimer's-associated genes
+2. Identify genes with druggable domains (kinases, GPCRs, ion channels, etc.)
+3. Check for brain expression patterns
+4. Assess disease relevance via literature mining
+5. Evaluate existing chemical probe availability
+6. Rank targets by:
+   - Genetic evidence strength
+   - Druggability
+   - Lack of existing therapies
+7. Suggest target validation experiments
+""")
+```
+
+### Example 3: Virtual Screening
+
+```python
+result = agent.go("""
+Perform virtual screening for EGFR kinase inhibitors.
+
+Database: ZINC15 lead-like subset (~6M compounds)
+Target: EGFR kinase domain (PDB: 1M17)
+
+Workflow:
+1. Prepare protein structure (remove waters, add hydrogens)
+2. Define binding pocket (based on erlotinib binding site)
+3. Generate pharmacophore model from known EGFR inhibitors
+4. Filter ZINC database by:
+   - Molecular weight: 200-500 Da
+   - LogP: 0-5
+   - Lipinski's rule of five
+   - Pharmacophore match
+5. Dock top 10,000 compounds
+6. Score by docking energy and predicted binding affinity
+7. Select top 100 for further analysis
+8. Predict ADMET properties for top hits
+9. Recommend top 10 compounds for experimental validation
+""")
+```
+
+---
+
+## GWAS and Genetic Analysis
+
+### Example 1: GWAS Summary Statistics Analysis
+
+**Task:** Interpret GWAS results and identify causal genes.
+
+```python
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Analyze GWAS summary statistics for Type 2 Diabetes.
+
+Input file: t2d_gwas_summary.txt
+Columns: CHR, BP, SNP, P, OR, BETA, SE, A1, A2
+
+Analysis steps:
+1. Identify genome-wide significant variants (P < 5e-8)
+2. Perform LD clumping to identify independent signals
+3. Map variants to genes using:
+   - Nearest gene
+   - eQTL databases (GTEx)
+   - Hi-C chromatin interactions
+4. Prioritize causal genes using multiple evidence:
+   - Fine-mapping scores
+   - Coding variant consequences
+   - Gene expression in relevant tissues (pancreas, liver, adipose)
+   - Pathway enrichment
+5. Identify druggable targets among causal genes
+6. Compare with known T2D genes and highlight novel associations
+7. Generate Manhattan plot, QQ plot, and gene prioritization table
+""")
+
+agent.save_conversation_history("t2d_gwas_analysis.pdf")
+```
+
+### Example 2: Polygenic Risk Score
+
+```python
+result = agent.go("""
+Develop and validate polygenic risk score (PRS) for coronary artery disease (CAD).
+
+Training GWAS: CAD_discovery_summary_stats.txt (N=180,000)
+Validation cohort: CAD_validation_genotypes.vcf (N=50,000)
+
+Tasks:
+1. Select variants for PRS using p-value thresholding (P < 1e-5)
+2. Perform LD clumping (r² < 0.1, 500kb window)
+3. Calculate PRS weights from GWAS betas
+4. Compute PRS for validation cohort individuals
+5. Evaluate PRS performance:
+   - AUC for CAD case/control discrimination
+   - Odds ratios across PRS deciles
+   - Compare to traditional risk factors (age, sex, BMI, smoking)
+6. Assess PRS calibration and create risk stratification plot
+7. Identify high-risk individuals (top 5% PRS)
+""")
+```
+
+### Example 3: Variant Pathogenicity Prediction
+
+```python
+result = agent.go("""
+Predict pathogenicity of rare coding variants in candidate disease genes.
+
+Variants (VCF format):
+- chr17:41234451:A>G (BRCA1 p.Arg1347Gly)
+- chr2:179428448:C>T (TTN p.Trp13579*)
+- chr7:117188679:G>A (CFTR p.Gly542Ser)
+
+For each variant, assess:
+1. In silico predictions (SIFT, PolyPhen2, CADD, REVEL)
+2. Population frequency (gnomAD)
+3. Evolutionary conservation (PhyloP, PhastCons)
+4. Protein structure impact (using AlphaFold structures)
+5. Functional domain location
+6. ClinVar annotations (if available)
+7. Literature evidence
+8. ACMG/AMP classification criteria
+
+Provide pathogenicity classification (benign, likely benign, VUS, likely pathogenic, pathogenic) with supporting evidence.
+""")
+```
+
+---
+
+## Clinical Genomics and Diagnostics
+
+### Example 1: Rare Disease Diagnosis
+
+**Task:** Diagnose rare genetic disease from whole exome sequencing.
+
+```python
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Analyze whole exome sequencing (WES) data for rare disease diagnosis.
+
+Patient phenotypes (HPO terms):
+- HP:0001250 (Seizures)
+- HP:0001249 (Intellectual disability)
+- HP:0001263 (Global developmental delay)
+- HP:0001252 (Hypotonia)
+
+VCF file: patient_trio.vcf (proband + parents)
+
+Analysis workflow:
+1. Variant filtering:
+   - Quality filters (QUAL > 30, DP > 10, GQ > 20)
+   - Frequency filters (gnomAD AF < 0.01)
+   - Functional impact (missense, nonsense, frameshift, splice site)
+
+2. Inheritance pattern analysis:
+   - De novo variants
+   - Autosomal recessive (compound het, homozygous)
+   - X-linked
+
+3. Phenotype-driven prioritization:
+   - Match candidate genes to HPO terms
+   - Use HPO-gene associations
+   - Check gene expression in relevant tissues (brain)
+
+4. Variant pathogenicity assessment:
+   - In silico predictions
+   - ACMG classification
+   - Literature evidence
+
+5. Generate diagnostic report with:
+   - Top candidate variants
+   - Supporting evidence
+   - Functional validation suggestions
+   - Genetic counseling recommendations
+""")
+
+agent.save_conversation_history("rare_disease_diagnosis.pdf")
+```
+
+### Example 2: Cancer Genomics Analysis
+
+```python
+result = agent.go("""
+Analyze tumor-normal paired sequencing for cancer genomics.
+
+Files:
+- tumor_sample.vcf (somatic variants)
+- tumor_rnaseq.bam (gene expression)
+- tumor_cnv.seg (copy number variants)
+
+Analysis:
+1. Identify driver mutations:
+   - Known cancer genes (COSMIC, OncoKB)
+   - Recurrent hotspot mutations
+   - Truncating mutations in tumor suppressors
+
+2. Analyze mutational signatures:
+   - Decompose signatures (COSMIC signatures)
+   - Identify mutagenic processes
+
+3. Copy number analysis:
+   - Identify amplifications and deletions
+   - Focal vs. arm-level events
+   - Assess oncogene amplifications and TSG deletions
+
+4. Gene expression analysis:
+   - Identify outlier gene expression
+   - Fusion transcript detection
+   - Pathway dysregulation
+
+5. Therapeutic implications:
+   - Match alterations to FDA-approved therapies
+   - Identify clinical trial opportunities
+   - Predict response to targeted therapies
+
+6. Generate precision oncology report
+""")
+```
+
+### Example 3: Pharmacogenomics
+
+```python
+result = agent.go("""
+Generate pharmacogenomics report for patient genotype data.
+
+VCF file: patient_pgx.vcf
+
+Analyze variants affecting drug metabolism:
+
+**CYP450 genes:**
+- CYP2D6 (affects ~25% of drugs)
+- CYP2C19 (clopidogrel, PPIs, antidepressants)
+- CYP2C9 (warfarin, NSAIDs)
+- CYP3A5 (tacrolimus, immunosuppressants)
+
+**Drug transporter genes:**
+- SLCO1B1 (statin myopathy risk)
+- ABCB1 (P-glycoprotein)
+
+**Drug targets:**
+- VKORC1 (warfarin dosing)
+- DPYD (fluoropyrimidine toxicity)
+- TPMT (thiopurine toxicity)
+
+For each gene:
+1. Determine diplotype (*1/*1, *1/*2, etc.)
+2. Assign metabolizer phenotype (PM, IM, NM, RM, UM)
+3. Provide dosing recommendations using CPIC/PharmGKB guidelines
+4. Flag high-risk drug-gene interactions
+5. Suggest alternative medications if needed
+
+Generate patient-friendly report with actionable recommendations.
+""")
+```
+
+---
+
+## Protein Structure and Function
+
+### Example 1: AlphaFold Structure Analysis
+
+```python
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Analyze AlphaFold structure prediction for novel protein.
+
+Protein: Hypothetical protein ABC123 (UniProt: Q9XYZ1)
+
+Tasks:
+1. Retrieve AlphaFold structure from database
+2. Assess prediction quality:
+   - pLDDT scores per residue
+   - Identify high-confidence regions (pLDDT > 90)
+   - Flag low-confidence regions (pLDDT < 50)
+
+3. Structural analysis:
+   - Identify domains using structural alignment
+   - Predict fold family
+   - Identify secondary structure elements
+
+4. Functional prediction:
+   - Search for structural homologs in PDB
+   - Identify conserved functional sites
+   - Predict binding pockets
+   - Suggest possible ligands/substrates
+
+5. Variant impact analysis:
+   - Map disease-associated variants to structure
+   - Predict structural consequences
+   - Identify variants affecting binding sites
+
+6. Generate PyMOL visualization scripts highlighting key features
+""")
+
+agent.save_conversation_history("alphafold_analysis.pdf")
+```
+
+### Example 2: Protein-Protein Interaction Prediction
+
+```python
+result = agent.go("""
+Predict and analyze protein-protein interactions for autophagy pathway.
+
+Query proteins: ATG5, ATG12, ATG16L1
+
+Analysis:
+1. Retrieve known interactions from:
+   - STRING database
+   - BioGRID
+   - IntAct
+   - Literature mining
+
+2. Predict novel interactions using:
+   - Structural modeling (AlphaFold-Multimer)
+   - Coexpression analysis
+   - Phylogenetic profiling
+
+3. Analyze interaction interfaces:
+   - Identify binding residues
+   - Assess interface properties (area, hydrophobicity)
+   - Predict binding affinity
+
+4. Functional analysis:
+   - Map interactions to autophagy pathway steps
+   - Identify regulatory interactions
+   - Predict complex stoichiometry
+
+5. Therapeutic implications:
+   - Identify druggable interfaces
+   - Suggest peptide inhibitors
+   - Design disruption strategies
+
+Generate network visualization and interaction details.
+""")
+```
+
+---
+
+## Literature and Knowledge Synthesis
+
+### Example 1: Systematic Literature Review
+
+```python
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Perform systematic literature review on CRISPR base editing applications.
+
+Search query: "CRISPR base editing" OR "base editor" OR "CBE" OR "ABE"
+Date range: 2016-2025
+
+Tasks:
+1. Search PubMed and retrieve relevant abstracts
+2. Filter for original research articles
+3. Extract key information:
+   - Base editor type (CBE, ABE, dual)
+   - Target organism/cell type
+   - Application (disease model, therapy, crop improvement)
+   - Editing efficiency
+   - Off-target assessment
+
+4. Categorize applications:
+   - Therapeutic applications (by disease)
+   - Agricultural applications
+   - Basic research
+
+5. Analyze trends:
+   - Publications over time
+   - Most studied diseases
+   - Evolution of base editor technology
+
+6. Synthesize findings:
+   - Clinical trial status
+   - Remaining challenges
+   - Future directions
+
+Generate comprehensive review document with citation statistics.
+""")
+
+agent.save_conversation_history("crispr_base_editing_review.pdf")
+```
+
+### Example 2: Gene Function Synthesis
+
+```python
+result = agent.go("""
+Synthesize knowledge about gene function from multiple sources.
+
+Target gene: PARK7 (DJ-1)
+
+Integrate information from:
+1. **Genetic databases:**
+   - NCBI Gene
+   - UniProt
+   - OMIM
+
+2. **Expression data:**
+   - GTEx tissue expression
+   - Human Protein Atlas
+   - Single-cell expression atlases
+
+3. **Functional data:**
+   - GO annotations
+   - KEGG pathways
+   - Reactome
+   - Protein interactions (STRING)
+
+4. **Disease associations:**
+   - ClinVar variants
+   - GWAS catalog
+   - Disease databases (DisGeNET)
+
+5. **Literature:**
+   - PubMed abstracts
+   - Key mechanistic studies
+   - Review articles
+
+Synthesize into comprehensive gene report:
+- Molecular function
+- Biological processes
+- Cellular localization
+- Tissue distribution
+- Disease associations
+- Known drug targets/inhibitors
+- Unresolved questions
+
+Generate structured summary suitable for research planning.
+""")
+```
+
+---
+
+## Multi-Omics Integration
+
+### Example 1: Multi-Omics Disease Analysis
+
+```python
+agent = A1(path='./data', llm='claude-sonnet-4-20250514')
+
+result = agent.go("""
+Integrate multi-omics data to understand disease mechanism.
+
+Disease: Alzheimer's disease
+Data types:
+- Genomics: GWAS summary statistics (gwas_ad.txt)
+- Transcriptomics: Brain RNA-seq (controls vs AD, rnaseq_data.csv)
+- Proteomics: CSF proteomics (proteomics_csf.csv)
+- Metabolomics: Plasma metabolomics (metabolomics_plasma.csv)
+- Epigenomics: Brain methylation array (methylation_data.csv)
+
+Integration workflow:
+1. Analyze each omics layer independently:
+   - Identify significantly altered features
+   - Perform pathway enrichment
+
+2. Cross-omics correlation:
+   - Correlate gene expression with protein levels
+   - Link genetic variants to expression (eQTL)
+   - Associate methylation with gene expression
+   - Connect proteins to metabolites
+
+3. Network analysis:
+   - Build multi-omics network
+   - Identify key hub genes/proteins
+   - Detect disease modules
+
+4. Causal inference:
+   - Prioritize drivers vs. consequences
+   - Identify therapeutic targets
+   - Predict drug mechanisms
+
+5. Generate integrative model of AD pathogenesis
+
+Provide visualization and therapeutic target recommendations.
+""")
+
+agent.save_conversation_history("ad_multiomics_analysis.pdf")
+```
+
+### Example 2: Systems Biology Modeling
+
+```python
+result = agent.go("""
+Build systems biology model of metabolic pathway.
+
+Pathway: Glycolysis
+Data sources:
+- Enzyme kinetics (BRENDA database)
+- Metabolite concentrations (literature)
+- Gene expression (tissue-specific, GTEx)
+- Flux measurements (C13 labeling studies)
+
+Modeling tasks:
+1. Construct pathway model:
+   - Define reactions and stoichiometry
+   - Parameterize enzyme kinetics (Km, Vmax, Ki)
+   - Set initial metabolite concentrations
+
+2. Simulate pathway dynamics:
+   - Steady-state analysis
+   - Time-course simulations
+   - Sensitivity analysis
+
+3. Constraint-based modeling:
+   - Flux balance analysis (FBA)
+   - Identify bottleneck reactions
+   - Predict metabolic engineering strategies
+
+4. Integrate with gene expression:
+   - Tissue-specific model predictions
+   - Disease vs. normal comparisons
+
+5. Therapeutic predictions:
+   - Enzyme inhibition effects
+   - Metabolic rescue strategies
+   - Drug target identification
+
+Generate model in SBML format and simulation results.
+""")
+```
+
+---
+
+## Best Practices for Task Formulation
+
+### 1. Be Specific and Detailed
+
+**Poor:**
+```python
+agent.go("Analyze this RNA-seq data")
+```
+
+**Good:**
+```python
+agent.go("""
+Analyze bulk RNA-seq data from cancer vs. normal samples.
+
+Files: cancer_rnaseq.csv (TPM values, 50 cancer, 50 normal)
+
+Tasks:
+1. Differential expression (DESeq2, padj < 0.05, |log2FC| > 1)
+2. Pathway enrichment (KEGG, Reactome)
+3. Generate volcano plot and top DE gene heatmap
+""")
+```
+
+### 2. Include File Paths and Formats
+
+Always specify:
+- Exact file paths
+- File formats (VCF, BAM, CSV, H5AD, etc.)
+- Data structure (columns, sample IDs)
+
+### 3. Set Clear Success Criteria
+
+Define thresholds and cutoffs:
+- Statistical significance (P < 0.05, FDR < 0.1)
+- Fold change thresholds
+- Quality filters
+- Expected outputs
+
+### 4. Request Visualizations
+
+Explicitly ask for plots:
+- Volcano plots, MA plots
+- Heatmaps, PCA plots
+- Network diagrams
+- Manhattan plots
+
+### 5. Specify Biological Context
+
+Include:
+- Organism (human, mouse, etc.)
+- Tissue/cell type
+- Disease/condition
+- Treatment details
+
+### 6. Request Interpretations
+
+Ask agent to:
+- Interpret biological significance
+- Suggest follow-up experiments
+- Identify limitations
+- Provide literature context
+
+---
+
+## Common Patterns
+
+### Data Quality Control
+
+```python
+"""
+Before analysis, perform quality control:
+1. Check for missing values
+2. Assess data distributions
+3. Identify outliers
+4. Generate QC report
+Only proceed with analysis if data passes QC.
+"""
+```
+
+### Iterative Refinement
+
+```python
+"""
+Perform analysis in stages:
+1. Initial exploratory analysis
+2. Based on results, refine parameters
+3. Focus on interesting findings
+4. Generate final report
+
+Show intermediate results for each stage.
+"""
+```
+
+### Reproducibility
+
+```python
+"""
+Ensure reproducibility:
+1. Set random seeds where applicable
+2. Log all parameters used
+3. Save intermediate files
+4. Export environment info (package versions)
+5. Generate methods section for paper
+"""
+```
+
+These examples demonstrate the breadth of biomedical tasks biomni can handle. Adapt the patterns to your specific research questions, and always include sufficient detail for the agent to execute autonomously.
diff --git a/scientific-packages/biomni/scripts/generate_report.py b/scientific-packages/biomni/scripts/generate_report.py
old mode 100644
new mode 100755
index df09085..640556e
--- a/scientific-packages/biomni/scripts/generate_report.py
+++ b/scientific-packages/biomni/scripts/generate_report.py
@@ -1,381 +1,370 @@
 #!/usr/bin/env python3
 """
-Enhanced PDF Report Generation for Biomni
+Enhanced PDF report generation for biomni conversation histories.
 
-This script provides advanced PDF report generation with custom formatting,
-styling, and metadata for Biomni analysis results.
+This script provides additional customization options for biomni reports:
+- Custom styling and branding
+- Formatted code blocks
+- Section organization
+- Metadata inclusion
+- Export format options (PDF, HTML, Markdown)
+
+Usage:
+    python generate_report.py --input conversation.json --output report.pdf
+    python generate_report.py --agent-object agent --output report.pdf --format html
 """
 
 import argparse
-import sys
+import json
 from pathlib import Path
+from typing import Dict, List, Optional, Any
 from datetime import datetime
-from typing import Optional, Dict, Any
 
 
-def generate_markdown_report(
-    title: str,
-    sections: list,
-    metadata: Optional[Dict[str, Any]] = None,
-    output_path: str = "report.md"
+def format_conversation_history(
+    messages: List[Dict[str, Any]],
+    include_metadata: bool = True,
+    include_code: bool = True,
+    include_timestamps: bool = False
 ) -> str:
     """
-    Generate a formatted markdown report.
+    Format conversation history into structured markdown.
 
     Args:
-        title: Report title
-        sections: List of dicts with 'heading' and 'content' keys
-        metadata: Optional metadata dict (author, date, etc.)
-        output_path: Path to save markdown file
+        messages: List of conversation message dictionaries
+        include_metadata: Include metadata section
+        include_code: Include code blocks
+        include_timestamps: Include message timestamps
 
     Returns:
-        Path to generated markdown file
+        Formatted markdown string
     """
-    md_content = []
-
-    # Title
-    md_content.append(f"# {title}\n")
-
-    # Metadata
-    if metadata:
-        md_content.append("---\n")
-        for key, value in metadata.items():
-            md_content.append(f"**{key}:** {value}  \n")
-        md_content.append("---\n\n")
-
-    # Sections
-    for section in sections:
-        heading = section.get('heading', 'Section')
-        content = section.get('content', '')
-        level = section.get('level', 2)  # Default to h2
-
-        md_content.append(f"{'#' * level} {heading}\n\n")
-        md_content.append(f"{content}\n\n")
-
-    # Write to file
-    output = Path(output_path)
-    output.write_text('\n'.join(md_content))
-
-    return str(output)
-
-
-def convert_to_pdf_weasyprint(
-    markdown_path: str,
-    output_path: str,
-    css_style: Optional[str] = None
-) -> bool:
-    """
-    Convert markdown to PDF using WeasyPrint.
-
-    Args:
-        markdown_path: Path to markdown file
-        output_path: Path for output PDF
-        css_style: Optional CSS stylesheet path
-
-    Returns:
-        True if successful, False otherwise
-    """
-    try:
-        import markdown
-        from weasyprint import HTML, CSS
-
-        # Read markdown
-        with open(markdown_path, 'r') as f:
-            md_content = f.read()
-
-        # Convert to HTML
-        html_content = markdown.markdown(
-            md_content,
-            extensions=['tables', 'fenced_code', 'codehilite']
-        )
-
-        # Wrap in HTML template
-        html_template = f"""
-        <!DOCTYPE html>
-        <html>
-        <head>
-            <meta charset="utf-8">
-            <title>Biomni Report</title>
-            <style>
-                body {{
-                    font-family: 'Helvetica', 'Arial', sans-serif;
-                    line-height: 1.6;
-                    color: #333;
-                    max-width: 800px;
-                    margin: 40px auto;
-                    padding: 20px;
-                }}
-                h1 {{
-                    color: #2c3e50;
-                    border-bottom: 3px solid #3498db;
-                    padding-bottom: 10px;
-                }}
-                h2 {{
-                    color: #34495e;
-                    margin-top: 30px;
-                    border-bottom: 1px solid #bdc3c7;
-                    padding-bottom: 5px;
-                }}
-                h3 {{
-                    color: #7f8c8d;
-                }}
-                code {{
-                    background-color: #f4f4f4;
-                    padding: 2px 6px;
-                    border-radius: 3px;
-                    font-family: 'Courier New', monospace;
-                }}
-                pre {{
-                    background-color: #f4f4f4;
-                    padding: 15px;
-                    border-radius: 5px;
-                    overflow-x: auto;
-                }}
-                table {{
-                    border-collapse: collapse;
-                    width: 100%;
-                    margin: 20px 0;
-                }}
-                th, td {{
-                    border: 1px solid #ddd;
-                    padding: 12px;
-                    text-align: left;
-                }}
-                th {{
-                    background-color: #3498db;
-                    color: white;
-                }}
-                tr:nth-child(even) {{
-                    background-color: #f9f9f9;
-                }}
-                .metadata {{
-                    background-color: #ecf0f1;
-                    padding: 15px;
-                    border-radius: 5px;
-                    margin: 20px 0;
-                }}
-            </style>
-        </head>
-        <body>
-            {html_content}
-        </body>
-        </html>
-        """
-
-        # Generate PDF
-        pdf = HTML(string=html_template)
-
-        # Add custom CSS if provided
-        stylesheets = []
-        if css_style and Path(css_style).exists():
-            stylesheets.append(CSS(filename=css_style))
-
-        pdf.write_pdf(output_path, stylesheets=stylesheets)
-
-        return True
-
-    except ImportError:
-        print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
-        return False
-    except Exception as e:
-        print(f"Error generating PDF: {e}")
-        return False
-
-
-def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
-    """
-    Convert markdown to PDF using Pandoc.
-
-    Args:
-        markdown_path: Path to markdown file
-        output_path: Path for output PDF
-
-    Returns:
-        True if successful, False otherwise
-    """
-    try:
-        import subprocess
-
-        # Check if pandoc is installed
-        result = subprocess.run(
-            ['pandoc', '--version'],
-            capture_output=True,
-            text=True
-        )
-
-        if result.returncode != 0:
-            print("Error: Pandoc not installed")
-            return False
-
-        # Convert with pandoc
-        result = subprocess.run(
-            [
-                'pandoc',
-                markdown_path,
-                '-o', output_path,
-                '--pdf-engine=pdflatex',
-                '-V', 'geometry:margin=1in',
-                '--toc'
-            ],
-            capture_output=True,
-            text=True
-        )
-
-        if result.returncode != 0:
-            print(f"Pandoc error: {result.stderr}")
-            return False
-
-        return True
-
-    except FileNotFoundError:
-        print("Error: Pandoc not found. Install from https://pandoc.org/")
-        return False
-    except Exception as e:
-        print(f"Error: {e}")
-        return False
-
-
-def create_biomni_report(
-    conversation_history: list,
-    output_path: str = "biomni_report.pdf",
-    method: str = "weasyprint"
-) -> bool:
-    """
-    Create a formatted PDF report from Biomni conversation history.
-
-    Args:
-        conversation_history: List of conversation turns
-        output_path: Output PDF path
-        method: Conversion method ('weasyprint' or 'pandoc')
-
-    Returns:
-        True if successful
-    """
-    # Prepare report sections
-    metadata = {
-        'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-        'Tool': 'Biomni AI Agent',
-        'Report Type': 'Analysis Summary'
-    }
-
     sections = []
 
-    # Executive Summary
-    sections.append({
-        'heading': 'Executive Summary',
-        'level': 2,
-        'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
-    })
+    # Header
+    sections.append("# Biomni Analysis Report\n")
 
-    # Conversation history
-    for i, turn in enumerate(conversation_history, 1):
-        sections.append({
-            'heading': f'Task {i}: {turn.get("task", "Analysis")}',
-            'level': 2,
-            'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
-        })
+    # Metadata
+    if include_metadata:
+        sections.append("## Metadata\n")
+        sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        sections.append(f"- **Number of interactions**: {len(messages)}")
+        sections.append("\n---\n")
+
+    # Process messages
+    sections.append("## Analysis\n")
+
+    for i, msg in enumerate(messages, 1):
+        role = msg.get('role', 'unknown')
+        content = msg.get('content', '')
+
+        if role == 'user':
+            sections.append(f"### Task {i // 2 + 1}\n")
+            sections.append(f"**Query:**\n```\n{content}\n```\n")
+
+        elif role == 'assistant':
+            sections.append(f"**Response:**\n")
+
+            # Check if content contains code
+            if include_code and ('```' in content or 'import ' in content):
+                # Attempt to separate text and code
+                parts = content.split('```')
+                for j, part in enumerate(parts):
+                    if j % 2 == 0:
+                        # Text content
+                        if part.strip():
+                            sections.append(f"{part.strip()}\n")
+                    else:
+                        # Code content
+                        # Check if language is specified
+                        lines = part.split('\n', 1)
+                        if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
+                            lang = lines[0].strip()
+                            code = lines[1]
+                        else:
+                            lang = 'python'  # Default to python
+                            code = part
+
+                        sections.append(f"```{lang}\n{code}\n```\n")
+            else:
+                sections.append(f"{content}\n")
+
+            sections.append("\n---\n")
+
+    return '\n'.join(sections)
+
+
+def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
+    """
+    Convert markdown to styled HTML.
+
+    Args:
+        markdown_content: Markdown string
+        title: HTML page title
+
+    Returns:
+        HTML string
+    """
+    # Simple markdown to HTML conversion
+    # For production use, consider using a library like markdown or mistune
+
+    html_template = f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{title}</title>
+    <style>
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+            line-height: 1.6;
+            max-width: 900px;
+            margin: 0 auto;
+            padding: 20px;
+            color: #333;
+        }}
+        h1 {{
+            color: #2c3e50;
+            border-bottom: 3px solid #3498db;
+            padding-bottom: 10px;
+        }}
+        h2 {{
+            color: #34495e;
+            margin-top: 30px;
+            border-bottom: 2px solid #95a5a6;
+            padding-bottom: 5px;
+        }}
+        h3 {{
+            color: #555;
+        }}
+        code {{
+            background-color: #f4f4f4;
+            padding: 2px 6px;
+            border-radius: 3px;
+            font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
+        }}
+        pre {{
+            background-color: #f8f8f8;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            padding: 15px;
+            overflow-x: auto;
+        }}
+        pre code {{
+            background-color: transparent;
+            padding: 0;
+        }}
+        hr {{
+            border: none;
+            border-top: 1px solid #ddd;
+            margin: 30px 0;
+        }}
+        .metadata {{
+            background-color: #ecf0f1;
+            padding: 15px;
+            border-radius: 5px;
+            margin-bottom: 20px;
+        }}
+        .task {{
+            background-color: #e8f4f8;
+            padding: 10px;
+            border-left: 4px solid #3498db;
+            margin: 20px 0;
+        }}
+        .footer {{
+            margin-top: 50px;
+            text-align: center;
+            color: #7f8c8d;
+            font-size: 0.9em;
+        }}
+    </style>
+</head>
+<body>
+    <div class="content">
+        {markdown_to_html_simple(markdown_content)}
+    </div>
+    <div class="footer">
+        <p>Generated with Biomni | Stanford SNAP Lab</p>
+        <p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
+    </div>
+</body>
+</html>
+"""
+    return html_template
+
+
+def markdown_to_html_simple(md: str) -> str:
+    """Simple markdown to HTML converter (basic implementation)."""
+    lines = md.split('\n')
+    html_lines = []
+    in_code_block = False
+    in_list = False
+
+    for line in lines:
+        # Code blocks
+        if line.startswith('```'):
+            if in_code_block:
+                html_lines.append('</code></pre>')
+                in_code_block = False
+            else:
+                lang = line[3:].strip()
+                html_lines.append(f'<pre><code class="language-{lang}">')
+                in_code_block = True
+            continue
+
+        if in_code_block:
+            html_lines.append(line)
+            continue
+
+        # Headers
+        if line.startswith('# '):
+            html_lines.append(f'<h1>{line[2:]}</h1>')
+        elif line.startswith('## '):
+            html_lines.append(f'<h2>{line[3:]}</h2>')
+        elif line.startswith('### '):
+            html_lines.append(f'<h3>{line[4:]}</h3>')
+        # Lists
+        elif line.startswith('- '):
+            if not in_list:
+                html_lines.append('<ul>')
+                in_list = True
+            html_lines.append(f'<li>{line[2:]}</li>')
+        else:
+            if in_list:
+                html_lines.append('</ul>')
+                in_list = False
+
+            # Horizontal rule
+            if line.strip() == '---':
+                html_lines.append('<hr>')
+            # Bold
+            elif '**' in line:
+                line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
+                html_lines.append(f'<p>{line}</p>')
+            # Regular paragraph
+            elif line.strip():
+                html_lines.append(f'<p>{line}</p>')
+            else:
+                html_lines.append('<br>')
+
+    if in_list:
+        html_lines.append('</ul>')
+
+    return '\n'.join(html_lines)
+
+
+def generate_report(
+    conversation_data: Dict[str, Any],
+    output_path: Path,
+    format: str = 'markdown',
+    title: Optional[str] = None
+):
+    """
+    Generate formatted report from conversation data.
+
+    Args:
+        conversation_data: Conversation history dictionary
+        output_path: Output file path
+        format: Output format ('markdown', 'html', or 'pdf')
+        title: Report title
+    """
+    messages = conversation_data.get('messages', [])
+
+    if not title:
+        title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"
 
     # Generate markdown
-    md_path = output_path.replace('.pdf', '.md')
-    generate_markdown_report(
-        title="Biomni Analysis Report",
-        sections=sections,
-        metadata=metadata,
-        output_path=md_path
-    )
+    markdown_content = format_conversation_history(messages)
+
+    if format == 'markdown':
+        output_path.write_text(markdown_content)
+        print(f"✓ Markdown report saved to {output_path}")
+
+    elif format == 'html':
+        html_content = markdown_to_html(markdown_content, title)
+        output_path.write_text(html_content)
+        print(f"✓ HTML report saved to {output_path}")
+
+    elif format == 'pdf':
+        # For PDF generation, we'd typically use a library like weasyprint or reportlab
+        # This is a placeholder implementation
+        print("PDF generation requires additional dependencies (weasyprint or reportlab)")
+        print("Falling back to HTML format...")
+
+        html_path = output_path.with_suffix('.html')
+        html_content = markdown_to_html(markdown_content, title)
+        html_path.write_text(html_content)
+
+        print(f"✓ HTML report saved to {html_path}")
+        print("  To convert to PDF:")
+        print(f"    1. Install weasyprint: pip install weasyprint")
+        print(f"    2. Run: weasyprint {html_path} {output_path}")
 
-    # Convert to PDF
-    if method == 'weasyprint':
-        success = convert_to_pdf_weasyprint(md_path, output_path)
-    elif method == 'pandoc':
-        success = convert_to_pdf_pandoc(md_path, output_path)
     else:
-        print(f"Unknown method: {method}")
-        return False
-
-    if success:
-        print(f"✓ Report generated: {output_path}")
-        print(f"  Markdown: {md_path}")
-    else:
-        print("✗ Failed to generate PDF")
-        print(f"  Markdown available: {md_path}")
-
-    return success
+        raise ValueError(f"Unsupported format: {format}")
 
 
 def main():
-    """CLI for report generation."""
+    """Main entry point for CLI usage."""
     parser = argparse.ArgumentParser(
-        description='Generate formatted PDF reports for Biomni analyses'
+        description="Generate enhanced reports from biomni conversation histories"
     )
 
     parser.add_argument(
-        'input',
-        type=str,
-        help='Input markdown file or conversation history'
+        '--input',
+        type=Path,
+        required=True,
+        help='Input conversation history JSON file'
     )
 
     parser.add_argument(
-        '-o', '--output',
-        type=str,
-        default='biomni_report.pdf',
-        help='Output PDF path (default: biomni_report.pdf)'
+        '--output',
+        type=Path,
+        required=True,
+        help='Output report file path'
     )
 
     parser.add_argument(
-        '-m', '--method',
-        type=str,
-        choices=['weasyprint', 'pandoc'],
-        default='weasyprint',
-        help='Conversion method (default: weasyprint)'
+        '--format',
+        choices=['markdown', 'html', 'pdf'],
+        default='markdown',
+        help='Output format (default: markdown)'
     )
 
     parser.add_argument(
-        '--css',
+        '--title',
         type=str,
-        help='Custom CSS stylesheet path'
+        help='Report title (optional)'
     )
 
     args = parser.parse_args()
 
-    # Check if input is markdown or conversation history
-    input_path = Path(args.input)
-
-    if not input_path.exists():
-        print(f"Error: Input file not found: {args.input}")
-        return 1
-
-    # If input is markdown, convert directly
-    if input_path.suffix == '.md':
-        if args.method == 'weasyprint':
-            success = convert_to_pdf_weasyprint(
-                str(input_path),
-                args.output,
-                args.css
-            )
-        else:
-            success = convert_to_pdf_pandoc(str(input_path), args.output)
-
-        return 0 if success else 1
-
-    # Otherwise, assume it's conversation history (JSON)
+    # Load conversation data
     try:
-        import json
-        with open(input_path) as f:
-            history = json.load(f)
-
-        success = create_biomni_report(
-            history,
-            args.output,
-            args.method
-        )
-
-        return 0 if success else 1
-
+        with open(args.input, 'r') as f:
+            conversation_data = json.load(f)
+    except FileNotFoundError:
+        print(f"❌ Input file not found: {args.input}")
+        return 1
     except json.JSONDecodeError:
-        print("Error: Input file is not valid JSON or markdown")
+        print(f"❌ Invalid JSON in input file: {args.input}")
+        return 1
+
+    # Generate report
+    try:
+        generate_report(
+            conversation_data,
+            args.output,
+            format=args.format,
+            title=args.title
+        )
+        return 0
+    except Exception as e:
+        print(f"❌ Error generating report: {e}")
         return 1
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
+    import sys
     sys.exit(main())
diff --git a/scientific-packages/biomni/scripts/setup_environment.py b/scientific-packages/biomni/scripts/setup_environment.py
old mode 100644
new mode 100755
index cf3e1f2..d8dfe2e
--- a/scientific-packages/biomni/scripts/setup_environment.py
+++ b/scientific-packages/biomni/scripts/setup_environment.py
@@ -1,230 +1,355 @@
 #!/usr/bin/env python3
 """
-Biomni Environment Setup and Validation Script
+Interactive setup script for biomni environment configuration.
 
-This script helps users set up and validate their Biomni environment,
-including checking dependencies, API keys, and data availability.
+This script helps users set up:
+1. Conda environment with required dependencies
+2. API keys for LLM providers
+3. Data lake directory configuration
+4. MCP server setup (optional)
+
+Usage:
+    python setup_environment.py
 """
 
 import os
 import sys
 import subprocess
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Dict, Optional
 
 
-def check_python_version() -> Tuple[bool, str]:
-    """Check if Python version is compatible."""
-    version = sys.version_info
-    if version.major == 3 and version.minor >= 8:
-        return True, f"Python {version.major}.{version.minor}.{version.micro} ✓"
-    else:
-        return False, f"Python {version.major}.{version.minor} - requires Python 3.8+"
-
-
-def check_conda_env() -> Tuple[bool, str]:
-    """Check if running in biomni conda environment."""
-    conda_env = os.environ.get('CONDA_DEFAULT_ENV', None)
-    if conda_env == 'biomni_e1':
-        return True, f"Conda environment: {conda_env} ✓"
-    else:
-        return False, f"Not in biomni_e1 environment (current: {conda_env})"
-
-
-def check_package_installed(package: str) -> bool:
-    """Check if a Python package is installed."""
+def check_conda_installed() -> bool:
+    """Check if conda is available in the system."""
     try:
-        __import__(package)
+        subprocess.run(
+            ['conda', '--version'],
+            capture_output=True,
+            check=True
+        )
         return True
-    except ImportError:
+    except (subprocess.CalledProcessError, FileNotFoundError):
         return False
 
 
-def check_dependencies() -> Tuple[bool, List[str]]:
-    """Check for required and optional dependencies."""
-    required = ['biomni']
-    optional = ['weasyprint', 'markdown2pdf']
+def setup_conda_environment():
+    """Guide user through conda environment setup."""
+    print("\n=== Conda Environment Setup ===")
 
-    missing_required = [pkg for pkg in required if not check_package_installed(pkg)]
-    missing_optional = [pkg for pkg in optional if not check_package_installed(pkg)]
+    if not check_conda_installed():
+        print("❌ Conda not found. Please install Miniconda or Anaconda:")
+        print("   https://docs.conda.io/en/latest/miniconda.html")
+        return False
 
-    messages = []
-    success = len(missing_required) == 0
+    print("✓ Conda is installed")
 
-    if missing_required:
-        messages.append(f"Missing required packages: {', '.join(missing_required)}")
-        messages.append("Install with: pip install biomni --upgrade")
+    # Check if biomni_e1 environment exists
+    result = subprocess.run(
+        ['conda', 'env', 'list'],
+        capture_output=True,
+        text=True
+    )
+
+    if 'biomni_e1' in result.stdout:
+        print("✓ biomni_e1 environment already exists")
+        return True
+
+    print("\nCreating biomni_e1 conda environment...")
+    print("This will install Python 3.10 and required dependencies.")
+
+    response = input("Proceed? [y/N]: ").strip().lower()
+    if response != 'y':
+        print("Skipping conda environment setup")
+        return False
+
+    try:
+        # Create conda environment
+        subprocess.run(
+            ['conda', 'create', '-n', 'biomni_e1', 'python=3.10', '-y'],
+            check=True
+        )
+
+        print("\n✓ Conda environment created successfully")
+        print("\nTo activate: conda activate biomni_e1")
+        print("Then install biomni: pip install biomni --upgrade")
+        return True
+
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Failed to create conda environment: {e}")
+        return False
+
+
+def setup_api_keys() -> Dict[str, str]:
+    """Interactive API key configuration."""
+    print("\n=== API Key Configuration ===")
+    print("Biomni supports multiple LLM providers.")
+    print("At minimum, configure one provider.")
+
+    api_keys = {}
+
+    # Anthropic (recommended)
+    print("\n1. Anthropic Claude (Recommended)")
+    print("   Get your API key from: https://console.anthropic.com/")
+    anthropic_key = input("   Enter ANTHROPIC_API_KEY (or press Enter to skip): ").strip()
+    if anthropic_key:
+        api_keys['ANTHROPIC_API_KEY'] = anthropic_key
+
+    # OpenAI
+    print("\n2. OpenAI")
+    print("   Get your API key from: https://platform.openai.com/api-keys")
+    openai_key = input("   Enter OPENAI_API_KEY (or press Enter to skip): ").strip()
+    if openai_key:
+        api_keys['OPENAI_API_KEY'] = openai_key
+
+    # Google Gemini
+    print("\n3. Google Gemini")
+    print("   Get your API key from: https://makersuite.google.com/app/apikey")
+    google_key = input("   Enter GOOGLE_API_KEY (or press Enter to skip): ").strip()
+    if google_key:
+        api_keys['GOOGLE_API_KEY'] = google_key
+
+    # Groq
+    print("\n4. Groq")
+    print("   Get your API key from: https://console.groq.com/keys")
+    groq_key = input("   Enter GROQ_API_KEY (or press Enter to skip): ").strip()
+    if groq_key:
+        api_keys['GROQ_API_KEY'] = groq_key
+
+    if not api_keys:
+        print("\n⚠️  No API keys configured. You'll need at least one to use biomni.")
+        return {}
+
+    return api_keys
+
+
+def save_api_keys(api_keys: Dict[str, str], method: str = 'env_file'):
+    """Save API keys using specified method."""
+    if method == 'env_file':
+        env_file = Path.cwd() / '.env'
+
+        # Read existing .env if present
+        existing_vars = {}
+        if env_file.exists():
+            with open(env_file, 'r') as f:
+                for line in f:
+                    line = line.strip()
+                    if line and not line.startswith('#'):
+                        if '=' in line:
+                            key, val = line.split('=', 1)
+                            existing_vars[key.strip()] = val.strip()
+
+        # Update with new keys
+        existing_vars.update(api_keys)
+
+        # Write to .env
+        with open(env_file, 'w') as f:
+            f.write("# Biomni API Keys\n")
+            f.write(f"# Generated by setup_environment.py\n\n")
+            for key, value in existing_vars.items():
+                f.write(f"{key}={value}\n")
+
+        print(f"\n✓ API keys saved to {env_file}")
+        print("  Keys will be loaded automatically when biomni runs in this directory")
+
+    elif method == 'shell_export':
+        shell_file = Path.home() / '.bashrc'  # or .zshrc for zsh users
+
+        print("\n📋 Add these lines to your shell configuration:")
+        for key, value in api_keys.items():
+            print(f"   export {key}=\"{value}\"")
+
+        print(f"\nThen run: source {shell_file}")
+
+
+def setup_data_directory() -> Optional[Path]:
+    """Configure biomni data lake directory."""
+    print("\n=== Data Lake Configuration ===")
+    print("Biomni requires ~11GB for integrated biomedical databases.")
+
+    default_path = Path.cwd() / 'biomni_data'
+    print(f"\nDefault location: {default_path}")
+
+    response = input("Use default location? [Y/n]: ").strip().lower()
+
+    if response == 'n':
+        custom_path = input("Enter custom path: ").strip()
+        data_path = Path(custom_path).expanduser().resolve()
     else:
-        messages.append("Required packages: ✓")
+        data_path = default_path
 
-    if missing_optional:
-        messages.append(f"Missing optional packages: {', '.join(missing_optional)}")
-        messages.append("For PDF reports, install: pip install weasyprint")
+    # Create directory if it doesn't exist
+    data_path.mkdir(parents=True, exist_ok=True)
 
-    return success, messages
+    print(f"\n✓ Data directory configured: {data_path}")
+    print("  Data will be downloaded automatically on first use")
+
+    return data_path
 
 
-def check_api_keys() -> Tuple[bool, Dict[str, bool]]:
-    """Check which API keys are configured."""
-    api_keys = {
-        'ANTHROPIC_API_KEY': os.environ.get('ANTHROPIC_API_KEY'),
-        'OPENAI_API_KEY': os.environ.get('OPENAI_API_KEY'),
-        'GEMINI_API_KEY': os.environ.get('GEMINI_API_KEY'),
-        'GROQ_API_KEY': os.environ.get('GROQ_API_KEY'),
-    }
+def test_installation(data_path: Path):
+    """Test biomni installation with a simple query."""
+    print("\n=== Installation Test ===")
+    print("Testing biomni installation with a simple query...")
 
-    configured = {key: bool(value) for key, value in api_keys.items()}
-    has_any = any(configured.values())
+    response = input("Run test? [Y/n]: ").strip().lower()
+    if response == 'n':
+        print("Skipping test")
+        return
 
-    return has_any, configured
+    test_code = f'''
+import os
+from biomni.agent import A1
 
+# Use environment variables for API keys
+agent = A1(path='{data_path}', llm='claude-sonnet-4-20250514')
 
-def check_data_directory(data_path: str = './data') -> Tuple[bool, str]:
-    """Check if Biomni data directory exists and has content."""
-    path = Path(data_path)
+# Simple test query
+result = agent.go("What is the primary function of the TP53 gene?")
+print("Test result:", result)
+'''
 
-    if not path.exists():
-        return False, f"Data directory not found at {data_path}"
+    test_file = Path('test_biomni.py')
+    with open(test_file, 'w') as f:
+        f.write(test_code)
 
-    # Check if directory has files (data has been downloaded)
-    files = list(path.glob('*'))
-    if len(files) == 0:
-        return False, f"Data directory exists but is empty. Run agent once to download."
+    print(f"\nTest script created: {test_file}")
+    print("Running test...")
 
-    # Rough size check (should be ~11GB)
-    total_size = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())
-    size_gb = total_size / (1024**3)
-
-    if size_gb < 1:
-        return False, f"Data directory exists but seems incomplete ({size_gb:.1f} GB)"
-
-    return True, f"Data directory: {data_path} ({size_gb:.1f} GB) ✓"
-
-
-def check_disk_space(required_gb: float = 20) -> Tuple[bool, str]:
-    """Check if sufficient disk space is available."""
     try:
-        import shutil
-        stat = shutil.disk_usage('.')
-        free_gb = stat.free / (1024**3)
-
-        if free_gb >= required_gb:
-            return True, f"Disk space: {free_gb:.1f} GB available ✓"
-        else:
-            return False, f"Low disk space: {free_gb:.1f} GB (need {required_gb} GB)"
-    except Exception as e:
-        return False, f"Could not check disk space: {e}"
+        subprocess.run([sys.executable, str(test_file)], check=True)
+        print("\n✓ Test completed successfully!")
+        test_file.unlink()  # Clean up test file
+    except subprocess.CalledProcessError:
+        print("\n❌ Test failed. Check your configuration.")
+        print(f"   Test script saved as {test_file} for debugging")
 
 
-def test_biomni_import() -> Tuple[bool, str]:
-    """Test if Biomni can be imported and initialized."""
-    try:
-        from biomni.agent import A1
-        from biomni.config import default_config
-        return True, "Biomni import successful ✓"
-    except ImportError as e:
-        return False, f"Cannot import Biomni: {e}"
-    except Exception as e:
-        return False, f"Biomni import error: {e}"
+def generate_example_script(data_path: Path):
+    """Generate example usage script."""
+    example_code = f'''#!/usr/bin/env python3
+"""
+Example biomni usage script
 
+This demonstrates basic biomni usage patterns.
+Modify this script for your research tasks.
+"""
 
-def suggest_fixes(results: Dict[str, Tuple[bool, any]]) -> List[str]:
-    """Generate suggestions for fixing issues."""
-    suggestions = []
+from biomni.agent import A1
 
-    if not results['python'][0]:
-        suggestions.append("➜ Upgrade Python to 3.8 or higher")
+# Initialize agent
+agent = A1(
+    path='{data_path}',
+    llm='claude-sonnet-4-20250514'  # or your preferred LLM
+)
 
-    if not results['conda'][0]:
-        suggestions.append("➜ Activate biomni environment: conda activate biomni_e1")
+# Example 1: Simple gene query
+print("Example 1: Gene function query")
+result = agent.go("""
+What are the main functions of the BRCA1 gene?
+Include information about:
+- Molecular function
+- Associated diseases
+- Protein interactions
+""")
+print(result)
+print("-" * 80)
 
-    if not results['dependencies'][0]:
-        suggestions.append("➜ Install Biomni: pip install biomni --upgrade")
+# Example 2: Data analysis
+print("\\nExample 2: GWAS analysis")
+result = agent.go("""
+Explain how to analyze GWAS summary statistics for:
+1. Identifying genome-wide significant variants
+2. Mapping variants to genes
+3. Pathway enrichment analysis
+""")
+print(result)
 
-    if not results['api_keys'][0]:
-        suggestions.append("➜ Set API key: export ANTHROPIC_API_KEY='your-key'")
-        suggestions.append("   Or create .env file with API keys")
+# Save conversation history
+agent.save_conversation_history("example_results.pdf")
+print("\\nResults saved to example_results.pdf")
+'''
 
-    if not results['data'][0]:
-        suggestions.append("➜ Data will auto-download on first agent.go() call")
+    example_file = Path('example_biomni_usage.py')
+    with open(example_file, 'w') as f:
+        f.write(example_code)
 
-    if not results['disk_space'][0]:
-        suggestions.append("➜ Free up disk space (need ~20GB total)")
-
-    return suggestions
+    print(f"\n✓ Example script created: {example_file}")
 
 
 def main():
-    """Run all environment checks and display results."""
+    """Main setup workflow."""
     print("=" * 60)
-    print("Biomni Environment Validation")
+    print("Biomni Environment Setup")
     print("=" * 60)
-    print()
 
-    # Run all checks
-    results = {}
+    # Step 1: Conda environment
+    conda_success = setup_conda_environment()
 
-    print("Checking Python version...")
-    results['python'] = check_python_version()
-    print(f"  {results['python'][1]}")
-    print()
+    if conda_success:
+        print("\n⚠️  Remember to activate the environment:")
+        print("   conda activate biomni_e1")
+        print("   pip install biomni --upgrade")
 
-    print("Checking conda environment...")
-    results['conda'] = check_conda_env()
-    print(f"  {results['conda'][1]}")
-    print()
+    # Step 2: API keys
+    api_keys = setup_api_keys()
 
-    print("Checking dependencies...")
-    results['dependencies'] = check_dependencies()
-    for msg in results['dependencies'][1]:
-        print(f"  {msg}")
-    print()
+    if api_keys:
+        print("\nHow would you like to store API keys?")
+        print("1. .env file (recommended, local to this directory)")
+        print("2. Shell export (add to .bashrc/.zshrc)")
 
-    print("Checking API keys...")
-    results['api_keys'] = check_api_keys()
-    has_keys, key_status = results['api_keys']
-    for key, configured in key_status.items():
-        status = "✓" if configured else "✗"
-        print(f"  {key}: {status}")
-    print()
+        choice = input("Choose [1/2]: ").strip()
 
-    print("Checking Biomni data directory...")
-    results['data'] = check_data_directory()
-    print(f"  {results['data'][1]}")
-    print()
+        if choice == '2':
+            save_api_keys(api_keys, method='shell_export')
+        else:
+            save_api_keys(api_keys, method='env_file')
 
-    print("Checking disk space...")
-    results['disk_space'] = check_disk_space()
-    print(f"  {results['disk_space'][1]}")
-    print()
+    # Step 3: Data directory
+    data_path = setup_data_directory()
 
-    print("Testing Biomni import...")
-    results['biomni_import'] = test_biomni_import()
-    print(f"  {results['biomni_import'][1]}")
-    print()
+    # Step 4: Generate example script
+    if data_path:
+        generate_example_script(data_path)
+
+    # Step 5: Test installation (optional)
+    if api_keys and data_path:
+        test_installation(data_path)
 
     # Summary
+    print("\n" + "=" * 60)
+    print("Setup Complete!")
     print("=" * 60)
-    all_passed = all(result[0] for result in results.values())
 
-    if all_passed:
-        print("✓ All checks passed! Environment is ready.")
-        print()
-        print("Quick start:")
-        print("  from biomni.agent import A1")
-        print("  agent = A1(path='./data', llm='claude-sonnet-4-20250514')")
-        print("  agent.go('Your biomedical task')")
+    if conda_success:
+        print("✓ Conda environment: biomni_e1")
+
+    if api_keys:
+        print(f"✓ API keys configured: {', '.join(api_keys.keys())}")
+
+    if data_path:
+        print(f"✓ Data directory: {data_path}")
+
+    print("\nNext steps:")
+    if conda_success:
+        print("1. conda activate biomni_e1")
+        print("2. pip install biomni --upgrade")
+        print("3. Run example_biomni_usage.py to test")
     else:
-        print("⚠ Some checks failed. See suggestions below:")
-        print()
-        suggestions = suggest_fixes(results)
-        for suggestion in suggestions:
-            print(suggestion)
+        print("1. Install conda/miniconda")
+        print("2. Run this script again")
 
-    print("=" * 60)
-
-    return 0 if all_passed else 1
+    print("\nFor documentation, see:")
+    print("  - GitHub: https://github.com/snap-stanford/biomni")
+    print("  - Paper: https://www.biorxiv.org/content/10.1101/2025.05.30.656746v1")
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n\nSetup interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Error during setup: {e}")
+        sys.exit(1)