Improved Biomni support

This commit is contained in:
Timothy Kassis
2025-10-22 08:38:06 -07:00
parent 71a3c3750f
commit 77822efeed
9 changed files with 2512 additions and 3393 deletions

457
scientific-packages/biomni/scripts/setup_environment.py Normal file → Executable file
View File

@@ -1,230 +1,355 @@
#!/usr/bin/env python3
"""
Biomni Environment Setup and Validation Script
Interactive setup script for biomni environment configuration.
This script helps users set up and validate their Biomni environment,
including checking dependencies, API keys, and data availability.
This script helps users set up:
1. Conda environment with required dependencies
2. API keys for LLM providers
3. Data lake directory configuration
4. MCP server setup (optional)
Usage:
python setup_environment.py
"""
import os
import sys
import subprocess
from pathlib import Path
from typing import Dict, List, Tuple
from typing import Dict, Optional
def check_python_version() -> Tuple[bool, str]:
"""Check if Python version is compatible."""
version = sys.version_info
if version.major == 3 and version.minor >= 8:
return True, f"Python {version.major}.{version.minor}.{version.micro}"
else:
return False, f"Python {version.major}.{version.minor} - requires Python 3.8+"
def check_conda_env() -> Tuple[bool, str]:
"""Check if running in biomni conda environment."""
conda_env = os.environ.get('CONDA_DEFAULT_ENV', None)
if conda_env == 'biomni_e1':
return True, f"Conda environment: {conda_env}"
else:
return False, f"Not in biomni_e1 environment (current: {conda_env})"
def check_package_installed(package: str) -> bool:
"""Check if a Python package is installed."""
def check_conda_installed() -> bool:
"""Check if conda is available in the system."""
try:
__import__(package)
subprocess.run(
['conda', '--version'],
capture_output=True,
check=True
)
return True
except ImportError:
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def check_dependencies() -> Tuple[bool, List[str]]:
"""Check for required and optional dependencies."""
required = ['biomni']
optional = ['weasyprint', 'markdown2pdf']
def setup_conda_environment():
"""Guide user through conda environment setup."""
print("\n=== Conda Environment Setup ===")
missing_required = [pkg for pkg in required if not check_package_installed(pkg)]
missing_optional = [pkg for pkg in optional if not check_package_installed(pkg)]
if not check_conda_installed():
print("❌ Conda not found. Please install Miniconda or Anaconda:")
print(" https://docs.conda.io/en/latest/miniconda.html")
return False
messages = []
success = len(missing_required) == 0
print("✓ Conda is installed")
if missing_required:
messages.append(f"Missing required packages: {', '.join(missing_required)}")
messages.append("Install with: pip install biomni --upgrade")
# Check if biomni_e1 environment exists
result = subprocess.run(
['conda', 'env', 'list'],
capture_output=True,
text=True
)
if 'biomni_e1' in result.stdout:
print("✓ biomni_e1 environment already exists")
return True
print("\nCreating biomni_e1 conda environment...")
print("This will install Python 3.10 and required dependencies.")
response = input("Proceed? [y/N]: ").strip().lower()
if response != 'y':
print("Skipping conda environment setup")
return False
try:
# Create conda environment
subprocess.run(
['conda', 'create', '-n', 'biomni_e1', 'python=3.10', '-y'],
check=True
)
print("\n✓ Conda environment created successfully")
print("\nTo activate: conda activate biomni_e1")
print("Then install biomni: pip install biomni --upgrade")
return True
except subprocess.CalledProcessError as e:
print(f"❌ Failed to create conda environment: {e}")
return False
def setup_api_keys() -> Dict[str, str]:
"""Interactive API key configuration."""
print("\n=== API Key Configuration ===")
print("Biomni supports multiple LLM providers.")
print("At minimum, configure one provider.")
api_keys = {}
# Anthropic (recommended)
print("\n1. Anthropic Claude (Recommended)")
print(" Get your API key from: https://console.anthropic.com/")
anthropic_key = input(" Enter ANTHROPIC_API_KEY (or press Enter to skip): ").strip()
if anthropic_key:
api_keys['ANTHROPIC_API_KEY'] = anthropic_key
# OpenAI
print("\n2. OpenAI")
print(" Get your API key from: https://platform.openai.com/api-keys")
openai_key = input(" Enter OPENAI_API_KEY (or press Enter to skip): ").strip()
if openai_key:
api_keys['OPENAI_API_KEY'] = openai_key
# Google Gemini
print("\n3. Google Gemini")
print(" Get your API key from: https://makersuite.google.com/app/apikey")
google_key = input(" Enter GOOGLE_API_KEY (or press Enter to skip): ").strip()
if google_key:
api_keys['GOOGLE_API_KEY'] = google_key
# Groq
print("\n4. Groq")
print(" Get your API key from: https://console.groq.com/keys")
groq_key = input(" Enter GROQ_API_KEY (or press Enter to skip): ").strip()
if groq_key:
api_keys['GROQ_API_KEY'] = groq_key
if not api_keys:
print("\n⚠️ No API keys configured. You'll need at least one to use biomni.")
return {}
return api_keys
def save_api_keys(api_keys: Dict[str, str], method: str = 'env_file'):
"""Save API keys using specified method."""
if method == 'env_file':
env_file = Path.cwd() / '.env'
# Read existing .env if present
existing_vars = {}
if env_file.exists():
with open(env_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
if '=' in line:
key, val = line.split('=', 1)
existing_vars[key.strip()] = val.strip()
# Update with new keys
existing_vars.update(api_keys)
# Write to .env
with open(env_file, 'w') as f:
f.write("# Biomni API Keys\n")
f.write(f"# Generated by setup_environment.py\n\n")
for key, value in existing_vars.items():
f.write(f"{key}={value}\n")
print(f"\n✓ API keys saved to {env_file}")
print(" Keys will be loaded automatically when biomni runs in this directory")
elif method == 'shell_export':
shell_file = Path.home() / '.bashrc' # or .zshrc for zsh users
print("\n📋 Add these lines to your shell configuration:")
for key, value in api_keys.items():
print(f" export {key}=\"{value}\"")
print(f"\nThen run: source {shell_file}")
def setup_data_directory() -> Optional[Path]:
"""Configure biomni data lake directory."""
print("\n=== Data Lake Configuration ===")
print("Biomni requires ~11GB for integrated biomedical databases.")
default_path = Path.cwd() / 'biomni_data'
print(f"\nDefault location: {default_path}")
response = input("Use default location? [Y/n]: ").strip().lower()
if response == 'n':
custom_path = input("Enter custom path: ").strip()
data_path = Path(custom_path).expanduser().resolve()
else:
messages.append("Required packages: ✓")
data_path = default_path
if missing_optional:
messages.append(f"Missing optional packages: {', '.join(missing_optional)}")
messages.append("For PDF reports, install: pip install weasyprint")
# Create directory if it doesn't exist
data_path.mkdir(parents=True, exist_ok=True)
return success, messages
print(f"\n✓ Data directory configured: {data_path}")
print(" Data will be downloaded automatically on first use")
return data_path
def check_api_keys() -> Tuple[bool, Dict[str, bool]]:
"""Check which API keys are configured."""
api_keys = {
'ANTHROPIC_API_KEY': os.environ.get('ANTHROPIC_API_KEY'),
'OPENAI_API_KEY': os.environ.get('OPENAI_API_KEY'),
'GEMINI_API_KEY': os.environ.get('GEMINI_API_KEY'),
'GROQ_API_KEY': os.environ.get('GROQ_API_KEY'),
}
def test_installation(data_path: Path):
"""Test biomni installation with a simple query."""
print("\n=== Installation Test ===")
print("Testing biomni installation with a simple query...")
configured = {key: bool(value) for key, value in api_keys.items()}
has_any = any(configured.values())
response = input("Run test? [Y/n]: ").strip().lower()
if response == 'n':
print("Skipping test")
return
return has_any, configured
test_code = f'''
import os
from biomni.agent import A1
# Use environment variables for API keys
agent = A1(path='{data_path}', llm='claude-sonnet-4-20250514')
def check_data_directory(data_path: str = './data') -> Tuple[bool, str]:
"""Check if Biomni data directory exists and has content."""
path = Path(data_path)
# Simple test query
result = agent.go("What is the primary function of the TP53 gene?")
print("Test result:", result)
'''
if not path.exists():
return False, f"Data directory not found at {data_path}"
test_file = Path('test_biomni.py')
with open(test_file, 'w') as f:
f.write(test_code)
# Check if directory has files (data has been downloaded)
files = list(path.glob('*'))
if len(files) == 0:
return False, f"Data directory exists but is empty. Run agent once to download."
print(f"\nTest script created: {test_file}")
print("Running test...")
# Rough size check (should be ~11GB)
total_size = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())
size_gb = total_size / (1024**3)
if size_gb < 1:
return False, f"Data directory exists but seems incomplete ({size_gb:.1f} GB)"
return True, f"Data directory: {data_path} ({size_gb:.1f} GB) ✓"
def check_disk_space(required_gb: float = 20) -> Tuple[bool, str]:
"""Check if sufficient disk space is available."""
try:
import shutil
stat = shutil.disk_usage('.')
free_gb = stat.free / (1024**3)
if free_gb >= required_gb:
return True, f"Disk space: {free_gb:.1f} GB available ✓"
else:
return False, f"Low disk space: {free_gb:.1f} GB (need {required_gb} GB)"
except Exception as e:
return False, f"Could not check disk space: {e}"
subprocess.run([sys.executable, str(test_file)], check=True)
print("\n✓ Test completed successfully!")
test_file.unlink() # Clean up test file
except subprocess.CalledProcessError:
print("\n❌ Test failed. Check your configuration.")
print(f" Test script saved as {test_file} for debugging")
def test_biomni_import() -> Tuple[bool, str]:
"""Test if Biomni can be imported and initialized."""
try:
from biomni.agent import A1
from biomni.config import default_config
return True, "Biomni import successful ✓"
except ImportError as e:
return False, f"Cannot import Biomni: {e}"
except Exception as e:
return False, f"Biomni import error: {e}"
def generate_example_script(data_path: Path):
"""Generate example usage script."""
example_code = f'''#!/usr/bin/env python3
"""
Example biomni usage script
This demonstrates basic biomni usage patterns.
Modify this script for your research tasks.
"""
def suggest_fixes(results: Dict[str, Tuple[bool, any]]) -> List[str]:
"""Generate suggestions for fixing issues."""
suggestions = []
from biomni.agent import A1
if not results['python'][0]:
suggestions.append("➜ Upgrade Python to 3.8 or higher")
# Initialize agent
agent = A1(
path='{data_path}',
llm='claude-sonnet-4-20250514' # or your preferred LLM
)
if not results['conda'][0]:
suggestions.append("➜ Activate biomni environment: conda activate biomni_e1")
# Example 1: Simple gene query
print("Example 1: Gene function query")
result = agent.go("""
What are the main functions of the BRCA1 gene?
Include information about:
- Molecular function
- Associated diseases
- Protein interactions
""")
print(result)
print("-" * 80)
if not results['dependencies'][0]:
suggestions.append("➜ Install Biomni: pip install biomni --upgrade")
# Example 2: Data analysis
print("\\nExample 2: GWAS analysis")
result = agent.go("""
Explain how to analyze GWAS summary statistics for:
1. Identifying genome-wide significant variants
2. Mapping variants to genes
3. Pathway enrichment analysis
""")
print(result)
if not results['api_keys'][0]:
suggestions.append("➜ Set API key: export ANTHROPIC_API_KEY='your-key'")
suggestions.append(" Or create .env file with API keys")
# Save conversation history
agent.save_conversation_history("example_results.pdf")
print("\\nResults saved to example_results.pdf")
'''
if not results['data'][0]:
suggestions.append("➜ Data will auto-download on first agent.go() call")
example_file = Path('example_biomni_usage.py')
with open(example_file, 'w') as f:
f.write(example_code)
if not results['disk_space'][0]:
suggestions.append("➜ Free up disk space (need ~20GB total)")
return suggestions
print(f"\n✓ Example script created: {example_file}")
def main():
"""Run all environment checks and display results."""
"""Main setup workflow."""
print("=" * 60)
print("Biomni Environment Validation")
print("Biomni Environment Setup")
print("=" * 60)
print()
# Run all checks
results = {}
# Step 1: Conda environment
conda_success = setup_conda_environment()
print("Checking Python version...")
results['python'] = check_python_version()
print(f" {results['python'][1]}")
print()
if conda_success:
print("\n⚠️ Remember to activate the environment:")
print(" conda activate biomni_e1")
print(" pip install biomni --upgrade")
print("Checking conda environment...")
results['conda'] = check_conda_env()
print(f" {results['conda'][1]}")
print()
# Step 2: API keys
api_keys = setup_api_keys()
print("Checking dependencies...")
results['dependencies'] = check_dependencies()
for msg in results['dependencies'][1]:
print(f" {msg}")
print()
if api_keys:
print("\nHow would you like to store API keys?")
print("1. .env file (recommended, local to this directory)")
print("2. Shell export (add to .bashrc/.zshrc)")
print("Checking API keys...")
results['api_keys'] = check_api_keys()
has_keys, key_status = results['api_keys']
for key, configured in key_status.items():
status = "" if configured else ""
print(f" {key}: {status}")
print()
choice = input("Choose [1/2]: ").strip()
print("Checking Biomni data directory...")
results['data'] = check_data_directory()
print(f" {results['data'][1]}")
print()
if choice == '2':
save_api_keys(api_keys, method='shell_export')
else:
save_api_keys(api_keys, method='env_file')
print("Checking disk space...")
results['disk_space'] = check_disk_space()
print(f" {results['disk_space'][1]}")
print()
# Step 3: Data directory
data_path = setup_data_directory()
print("Testing Biomni import...")
results['biomni_import'] = test_biomni_import()
print(f" {results['biomni_import'][1]}")
print()
# Step 4: Generate example script
if data_path:
generate_example_script(data_path)
# Step 5: Test installation (optional)
if api_keys and data_path:
test_installation(data_path)
# Summary
print("\n" + "=" * 60)
print("Setup Complete!")
print("=" * 60)
all_passed = all(result[0] for result in results.values())
if all_passed:
print("All checks passed! Environment is ready.")
print()
print("Quick start:")
print(" from biomni.agent import A1")
print(" agent = A1(path='./data', llm='claude-sonnet-4-20250514')")
print(" agent.go('Your biomedical task')")
if conda_success:
print("Conda environment: biomni_e1")
if api_keys:
print(f"✓ API keys configured: {', '.join(api_keys.keys())}")
if data_path:
print(f"✓ Data directory: {data_path}")
print("\nNext steps:")
if conda_success:
print("1. conda activate biomni_e1")
print("2. pip install biomni --upgrade")
print("3. Run example_biomni_usage.py to test")
else:
print("⚠ Some checks failed. See suggestions below:")
print()
suggestions = suggest_fixes(results)
for suggestion in suggestions:
print(suggestion)
print("1. Install conda/miniconda")
print("2. Run this script again")
print("=" * 60)
return 0 if all_passed else 1
print("\nFor documentation, see:")
print(" - GitHub: https://github.com/snap-stanford/biomni")
print(" - Paper: https://www.biorxiv.org/content/10.1101/2025.05.30.656746v1")
if __name__ == "__main__":
sys.exit(main())
try:
main()
except KeyboardInterrupt:
print("\n\nSetup interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\n❌ Error during setup: {e}")
sys.exit(1)