Improved Biomni support

This commit is contained in:
Timothy Kassis
2025-10-22 08:38:06 -07:00
parent 71a3c3750f
commit 77822efeed
9 changed files with 2512 additions and 3393 deletions

649
scientific-packages/biomni/scripts/generate_report.py Normal file → Executable file
View File

@@ -1,381 +1,370 @@
#!/usr/bin/env python3
"""
Enhanced PDF Report Generation for Biomni
Enhanced PDF report generation for biomni conversation histories.
This script provides advanced PDF report generation with custom formatting,
styling, and metadata for Biomni analysis results.
This script provides additional customization options for biomni reports:
- Custom styling and branding
- Formatted code blocks
- Section organization
- Metadata inclusion
- Export format options (PDF, HTML, Markdown)
Usage:
python generate_report.py --input conversation.json --output report.pdf
python generate_report.py --agent-object agent --output report.pdf --format html
"""
import argparse
import sys
import json
from pathlib import Path
from typing import Dict, List, Optional, Any
from datetime import datetime
from typing import Optional, Dict, Any
def generate_markdown_report(
title: str,
sections: list,
metadata: Optional[Dict[str, Any]] = None,
output_path: str = "report.md"
def format_conversation_history(
messages: List[Dict[str, Any]],
include_metadata: bool = True,
include_code: bool = True,
include_timestamps: bool = False
) -> str:
"""
Generate a formatted markdown report.
Format conversation history into structured markdown.
Args:
title: Report title
sections: List of dicts with 'heading' and 'content' keys
metadata: Optional metadata dict (author, date, etc.)
output_path: Path to save markdown file
messages: List of conversation message dictionaries
include_metadata: Include metadata section
include_code: Include code blocks
include_timestamps: Include message timestamps
Returns:
Path to generated markdown file
Formatted markdown string
"""
md_content = []
# Title
md_content.append(f"# {title}\n")
# Metadata
if metadata:
md_content.append("---\n")
for key, value in metadata.items():
md_content.append(f"**{key}:** {value} \n")
md_content.append("---\n\n")
# Sections
for section in sections:
heading = section.get('heading', 'Section')
content = section.get('content', '')
level = section.get('level', 2) # Default to h2
md_content.append(f"{'#' * level} {heading}\n\n")
md_content.append(f"{content}\n\n")
# Write to file
output = Path(output_path)
output.write_text('\n'.join(md_content))
return str(output)
def convert_to_pdf_weasyprint(
markdown_path: str,
output_path: str,
css_style: Optional[str] = None
) -> bool:
"""
Convert markdown to PDF using WeasyPrint.
Args:
markdown_path: Path to markdown file
output_path: Path for output PDF
css_style: Optional CSS stylesheet path
Returns:
True if successful, False otherwise
"""
try:
import markdown
from weasyprint import HTML, CSS
# Read markdown
with open(markdown_path, 'r') as f:
md_content = f.read()
# Convert to HTML
html_content = markdown.markdown(
md_content,
extensions=['tables', 'fenced_code', 'codehilite']
)
# Wrap in HTML template
html_template = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Biomni Report</title>
<style>
body {{
font-family: 'Helvetica', 'Arial', sans-serif;
line-height: 1.6;
color: #333;
max-width: 800px;
margin: 40px auto;
padding: 20px;
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
h2 {{
color: #34495e;
margin-top: 30px;
border-bottom: 1px solid #bdc3c7;
padding-bottom: 5px;
}}
h3 {{
color: #7f8c8d;
}}
code {{
background-color: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
}}
pre {{
background-color: #f4f4f4;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
}}
table {{
border-collapse: collapse;
width: 100%;
margin: 20px 0;
}}
th, td {{
border: 1px solid #ddd;
padding: 12px;
text-align: left;
}}
th {{
background-color: #3498db;
color: white;
}}
tr:nth-child(even) {{
background-color: #f9f9f9;
}}
.metadata {{
background-color: #ecf0f1;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}}
</style>
</head>
<body>
{html_content}
</body>
</html>
"""
# Generate PDF
pdf = HTML(string=html_template)
# Add custom CSS if provided
stylesheets = []
if css_style and Path(css_style).exists():
stylesheets.append(CSS(filename=css_style))
pdf.write_pdf(output_path, stylesheets=stylesheets)
return True
except ImportError:
print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
return False
except Exception as e:
print(f"Error generating PDF: {e}")
return False
def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
"""
Convert markdown to PDF using Pandoc.
Args:
markdown_path: Path to markdown file
output_path: Path for output PDF
Returns:
True if successful, False otherwise
"""
try:
import subprocess
# Check if pandoc is installed
result = subprocess.run(
['pandoc', '--version'],
capture_output=True,
text=True
)
if result.returncode != 0:
print("Error: Pandoc not installed")
return False
# Convert with pandoc
result = subprocess.run(
[
'pandoc',
markdown_path,
'-o', output_path,
'--pdf-engine=pdflatex',
'-V', 'geometry:margin=1in',
'--toc'
],
capture_output=True,
text=True
)
if result.returncode != 0:
print(f"Pandoc error: {result.stderr}")
return False
return True
except FileNotFoundError:
print("Error: Pandoc not found. Install from https://pandoc.org/")
return False
except Exception as e:
print(f"Error: {e}")
return False
def create_biomni_report(
conversation_history: list,
output_path: str = "biomni_report.pdf",
method: str = "weasyprint"
) -> bool:
"""
Create a formatted PDF report from Biomni conversation history.
Args:
conversation_history: List of conversation turns
output_path: Output PDF path
method: Conversion method ('weasyprint' or 'pandoc')
Returns:
True if successful
"""
# Prepare report sections
metadata = {
'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'Tool': 'Biomni AI Agent',
'Report Type': 'Analysis Summary'
}
sections = []
# Executive Summary
sections.append({
'heading': 'Executive Summary',
'level': 2,
'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
})
# Header
sections.append("# Biomni Analysis Report\n")
# Conversation history
for i, turn in enumerate(conversation_history, 1):
sections.append({
'heading': f'Task {i}: {turn.get("task", "Analysis")}',
'level': 2,
'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
})
# Metadata
if include_metadata:
sections.append("## Metadata\n")
sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
sections.append(f"- **Number of interactions**: {len(messages)}")
sections.append("\n---\n")
# Process messages
sections.append("## Analysis\n")
for i, msg in enumerate(messages, 1):
role = msg.get('role', 'unknown')
content = msg.get('content', '')
if role == 'user':
sections.append(f"### Task {i // 2 + 1}\n")
sections.append(f"**Query:**\n```\n{content}\n```\n")
elif role == 'assistant':
sections.append(f"**Response:**\n")
# Check if content contains code
if include_code and ('```' in content or 'import ' in content):
# Attempt to separate text and code
parts = content.split('```')
for j, part in enumerate(parts):
if j % 2 == 0:
# Text content
if part.strip():
sections.append(f"{part.strip()}\n")
else:
# Code content
# Check if language is specified
lines = part.split('\n', 1)
if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
lang = lines[0].strip()
code = lines[1]
else:
lang = 'python' # Default to python
code = part
sections.append(f"```{lang}\n{code}\n```\n")
else:
sections.append(f"{content}\n")
sections.append("\n---\n")
return '\n'.join(sections)
def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
"""
Convert markdown to styled HTML.
Args:
markdown_content: Markdown string
title: HTML page title
Returns:
HTML string
"""
# Simple markdown to HTML conversion
# For production use, consider using a library like markdown or mistune
html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
max-width: 900px;
margin: 0 auto;
padding: 20px;
color: #333;
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
h2 {{
color: #34495e;
margin-top: 30px;
border-bottom: 2px solid #95a5a6;
padding-bottom: 5px;
}}
h3 {{
color: #555;
}}
code {{
background-color: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
}}
pre {{
background-color: #f8f8f8;
border: 1px solid #ddd;
border-radius: 5px;
padding: 15px;
overflow-x: auto;
}}
pre code {{
background-color: transparent;
padding: 0;
}}
hr {{
border: none;
border-top: 1px solid #ddd;
margin: 30px 0;
}}
.metadata {{
background-color: #ecf0f1;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}}
.task {{
background-color: #e8f4f8;
padding: 10px;
border-left: 4px solid #3498db;
margin: 20px 0;
}}
.footer {{
margin-top: 50px;
text-align: center;
color: #7f8c8d;
font-size: 0.9em;
}}
</style>
</head>
<body>
<div class="content">
{markdown_to_html_simple(markdown_content)}
</div>
<div class="footer">
<p>Generated with Biomni | Stanford SNAP Lab</p>
<p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
</div>
</body>
</html>
"""
return html_template
def markdown_to_html_simple(md: str) -> str:
"""Simple markdown to HTML converter (basic implementation)."""
lines = md.split('\n')
html_lines = []
in_code_block = False
in_list = False
for line in lines:
# Code blocks
if line.startswith('```'):
if in_code_block:
html_lines.append('</code></pre>')
in_code_block = False
else:
lang = line[3:].strip()
html_lines.append(f'<pre><code class="language-{lang}">')
in_code_block = True
continue
if in_code_block:
html_lines.append(line)
continue
# Headers
if line.startswith('# '):
html_lines.append(f'<h1>{line[2:]}</h1>')
elif line.startswith('## '):
html_lines.append(f'<h2>{line[3:]}</h2>')
elif line.startswith('### '):
html_lines.append(f'<h3>{line[4:]}</h3>')
# Lists
elif line.startswith('- '):
if not in_list:
html_lines.append('<ul>')
in_list = True
html_lines.append(f'<li>{line[2:]}</li>')
else:
if in_list:
html_lines.append('</ul>')
in_list = False
# Horizontal rule
if line.strip() == '---':
html_lines.append('<hr>')
# Bold
elif '**' in line:
line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
html_lines.append(f'<p>{line}</p>')
# Regular paragraph
elif line.strip():
html_lines.append(f'<p>{line}</p>')
else:
html_lines.append('<br>')
if in_list:
html_lines.append('</ul>')
return '\n'.join(html_lines)
def generate_report(
conversation_data: Dict[str, Any],
output_path: Path,
format: str = 'markdown',
title: Optional[str] = None
):
"""
Generate formatted report from conversation data.
Args:
conversation_data: Conversation history dictionary
output_path: Output file path
format: Output format ('markdown', 'html', or 'pdf')
title: Report title
"""
messages = conversation_data.get('messages', [])
if not title:
title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"
# Generate markdown
md_path = output_path.replace('.pdf', '.md')
generate_markdown_report(
title="Biomni Analysis Report",
sections=sections,
metadata=metadata,
output_path=md_path
)
markdown_content = format_conversation_history(messages)
if format == 'markdown':
output_path.write_text(markdown_content)
print(f"✓ Markdown report saved to {output_path}")
elif format == 'html':
html_content = markdown_to_html(markdown_content, title)
output_path.write_text(html_content)
print(f"✓ HTML report saved to {output_path}")
elif format == 'pdf':
# For PDF generation, we'd typically use a library like weasyprint or reportlab
# This is a placeholder implementation
print("PDF generation requires additional dependencies (weasyprint or reportlab)")
print("Falling back to HTML format...")
html_path = output_path.with_suffix('.html')
html_content = markdown_to_html(markdown_content, title)
html_path.write_text(html_content)
print(f"✓ HTML report saved to {html_path}")
print(" To convert to PDF:")
print(f" 1. Install weasyprint: pip install weasyprint")
print(f" 2. Run: weasyprint {html_path} {output_path}")
# Convert to PDF
if method == 'weasyprint':
success = convert_to_pdf_weasyprint(md_path, output_path)
elif method == 'pandoc':
success = convert_to_pdf_pandoc(md_path, output_path)
else:
print(f"Unknown method: {method}")
return False
if success:
print(f"✓ Report generated: {output_path}")
print(f" Markdown: {md_path}")
else:
print("✗ Failed to generate PDF")
print(f" Markdown available: {md_path}")
return success
raise ValueError(f"Unsupported format: {format}")
def main():
"""CLI for report generation."""
"""Main entry point for CLI usage."""
parser = argparse.ArgumentParser(
description='Generate formatted PDF reports for Biomni analyses'
description="Generate enhanced reports from biomni conversation histories"
)
parser.add_argument(
'input',
type=str,
help='Input markdown file or conversation history'
'--input',
type=Path,
required=True,
help='Input conversation history JSON file'
)
parser.add_argument(
'-o', '--output',
type=str,
default='biomni_report.pdf',
help='Output PDF path (default: biomni_report.pdf)'
'--output',
type=Path,
required=True,
help='Output report file path'
)
parser.add_argument(
'-m', '--method',
type=str,
choices=['weasyprint', 'pandoc'],
default='weasyprint',
help='Conversion method (default: weasyprint)'
'--format',
choices=['markdown', 'html', 'pdf'],
default='markdown',
help='Output format (default: markdown)'
)
parser.add_argument(
'--css',
'--title',
type=str,
help='Custom CSS stylesheet path'
help='Report title (optional)'
)
args = parser.parse_args()
# Check if input is markdown or conversation history
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {args.input}")
return 1
# If input is markdown, convert directly
if input_path.suffix == '.md':
if args.method == 'weasyprint':
success = convert_to_pdf_weasyprint(
str(input_path),
args.output,
args.css
)
else:
success = convert_to_pdf_pandoc(str(input_path), args.output)
return 0 if success else 1
# Otherwise, assume it's conversation history (JSON)
# Load conversation data
try:
import json
with open(input_path) as f:
history = json.load(f)
success = create_biomni_report(
history,
args.output,
args.method
)
return 0 if success else 1
with open(args.input, 'r') as f:
conversation_data = json.load(f)
except FileNotFoundError:
print(f"❌ Input file not found: {args.input}")
return 1
except json.JSONDecodeError:
print("Error: Input file is not valid JSON or markdown")
print(f"❌ Invalid JSON in input file: {args.input}")
return 1
# Generate report
try:
generate_report(
conversation_data,
args.output,
format=args.format,
title=args.title
)
return 0
except Exception as e:
print(f"❌ Error generating report: {e}")
return 1
if __name__ == "__main__":
if __name__ == '__main__':
import sys
sys.exit(main())

457
scientific-packages/biomni/scripts/setup_environment.py Normal file → Executable file
View File

@@ -1,230 +1,355 @@
#!/usr/bin/env python3
"""
Biomni Environment Setup and Validation Script
Interactive setup script for biomni environment configuration.
This script helps users set up and validate their Biomni environment,
including checking dependencies, API keys, and data availability.
This script helps users set up:
1. Conda environment with required dependencies
2. API keys for LLM providers
3. Data lake directory configuration
4. MCP server setup (optional)
Usage:
python setup_environment.py
"""
import os
import sys
import subprocess
from pathlib import Path
from typing import Dict, List, Tuple
from typing import Dict, Optional
def check_python_version() -> Tuple[bool, str]:
"""Check if Python version is compatible."""
version = sys.version_info
if version.major == 3 and version.minor >= 8:
return True, f"Python {version.major}.{version.minor}.{version.micro}"
else:
return False, f"Python {version.major}.{version.minor} - requires Python 3.8+"
def check_conda_env() -> Tuple[bool, str]:
"""Check if running in biomni conda environment."""
conda_env = os.environ.get('CONDA_DEFAULT_ENV', None)
if conda_env == 'biomni_e1':
return True, f"Conda environment: {conda_env}"
else:
return False, f"Not in biomni_e1 environment (current: {conda_env})"
def check_package_installed(package: str) -> bool:
"""Check if a Python package is installed."""
def check_conda_installed() -> bool:
"""Check if conda is available in the system."""
try:
__import__(package)
subprocess.run(
['conda', '--version'],
capture_output=True,
check=True
)
return True
except ImportError:
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def check_dependencies() -> Tuple[bool, List[str]]:
"""Check for required and optional dependencies."""
required = ['biomni']
optional = ['weasyprint', 'markdown2pdf']
def setup_conda_environment():
"""Guide user through conda environment setup."""
print("\n=== Conda Environment Setup ===")
missing_required = [pkg for pkg in required if not check_package_installed(pkg)]
missing_optional = [pkg for pkg in optional if not check_package_installed(pkg)]
if not check_conda_installed():
print("❌ Conda not found. Please install Miniconda or Anaconda:")
print(" https://docs.conda.io/en/latest/miniconda.html")
return False
messages = []
success = len(missing_required) == 0
print("✓ Conda is installed")
if missing_required:
messages.append(f"Missing required packages: {', '.join(missing_required)}")
messages.append("Install with: pip install biomni --upgrade")
# Check if biomni_e1 environment exists
result = subprocess.run(
['conda', 'env', 'list'],
capture_output=True,
text=True
)
if 'biomni_e1' in result.stdout:
print("✓ biomni_e1 environment already exists")
return True
print("\nCreating biomni_e1 conda environment...")
print("This will install Python 3.10 and required dependencies.")
response = input("Proceed? [y/N]: ").strip().lower()
if response != 'y':
print("Skipping conda environment setup")
return False
try:
# Create conda environment
subprocess.run(
['conda', 'create', '-n', 'biomni_e1', 'python=3.10', '-y'],
check=True
)
print("\n✓ Conda environment created successfully")
print("\nTo activate: conda activate biomni_e1")
print("Then install biomni: pip install biomni --upgrade")
return True
except subprocess.CalledProcessError as e:
print(f"❌ Failed to create conda environment: {e}")
return False
def setup_api_keys() -> Dict[str, str]:
"""Interactive API key configuration."""
print("\n=== API Key Configuration ===")
print("Biomni supports multiple LLM providers.")
print("At minimum, configure one provider.")
api_keys = {}
# Anthropic (recommended)
print("\n1. Anthropic Claude (Recommended)")
print(" Get your API key from: https://console.anthropic.com/")
anthropic_key = input(" Enter ANTHROPIC_API_KEY (or press Enter to skip): ").strip()
if anthropic_key:
api_keys['ANTHROPIC_API_KEY'] = anthropic_key
# OpenAI
print("\n2. OpenAI")
print(" Get your API key from: https://platform.openai.com/api-keys")
openai_key = input(" Enter OPENAI_API_KEY (or press Enter to skip): ").strip()
if openai_key:
api_keys['OPENAI_API_KEY'] = openai_key
# Google Gemini
print("\n3. Google Gemini")
print(" Get your API key from: https://makersuite.google.com/app/apikey")
google_key = input(" Enter GOOGLE_API_KEY (or press Enter to skip): ").strip()
if google_key:
api_keys['GOOGLE_API_KEY'] = google_key
# Groq
print("\n4. Groq")
print(" Get your API key from: https://console.groq.com/keys")
groq_key = input(" Enter GROQ_API_KEY (or press Enter to skip): ").strip()
if groq_key:
api_keys['GROQ_API_KEY'] = groq_key
if not api_keys:
print("\n⚠️ No API keys configured. You'll need at least one to use biomni.")
return {}
return api_keys
def save_api_keys(api_keys: Dict[str, str], method: str = 'env_file'):
"""Save API keys using specified method."""
if method == 'env_file':
env_file = Path.cwd() / '.env'
# Read existing .env if present
existing_vars = {}
if env_file.exists():
with open(env_file, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#'):
if '=' in line:
key, val = line.split('=', 1)
existing_vars[key.strip()] = val.strip()
# Update with new keys
existing_vars.update(api_keys)
# Write to .env
with open(env_file, 'w') as f:
f.write("# Biomni API Keys\n")
f.write(f"# Generated by setup_environment.py\n\n")
for key, value in existing_vars.items():
f.write(f"{key}={value}\n")
print(f"\n✓ API keys saved to {env_file}")
print(" Keys will be loaded automatically when biomni runs in this directory")
elif method == 'shell_export':
shell_file = Path.home() / '.bashrc' # or .zshrc for zsh users
print("\n📋 Add these lines to your shell configuration:")
for key, value in api_keys.items():
print(f" export {key}=\"{value}\"")
print(f"\nThen run: source {shell_file}")
def setup_data_directory() -> Optional[Path]:
"""Configure biomni data lake directory."""
print("\n=== Data Lake Configuration ===")
print("Biomni requires ~11GB for integrated biomedical databases.")
default_path = Path.cwd() / 'biomni_data'
print(f"\nDefault location: {default_path}")
response = input("Use default location? [Y/n]: ").strip().lower()
if response == 'n':
custom_path = input("Enter custom path: ").strip()
data_path = Path(custom_path).expanduser().resolve()
else:
messages.append("Required packages: ✓")
data_path = default_path
if missing_optional:
messages.append(f"Missing optional packages: {', '.join(missing_optional)}")
messages.append("For PDF reports, install: pip install weasyprint")
# Create directory if it doesn't exist
data_path.mkdir(parents=True, exist_ok=True)
return success, messages
print(f"\n✓ Data directory configured: {data_path}")
print(" Data will be downloaded automatically on first use")
return data_path
def check_api_keys() -> Tuple[bool, Dict[str, bool]]:
"""Check which API keys are configured."""
api_keys = {
'ANTHROPIC_API_KEY': os.environ.get('ANTHROPIC_API_KEY'),
'OPENAI_API_KEY': os.environ.get('OPENAI_API_KEY'),
'GEMINI_API_KEY': os.environ.get('GEMINI_API_KEY'),
'GROQ_API_KEY': os.environ.get('GROQ_API_KEY'),
}
def test_installation(data_path: Path):
"""Test biomni installation with a simple query."""
print("\n=== Installation Test ===")
print("Testing biomni installation with a simple query...")
configured = {key: bool(value) for key, value in api_keys.items()}
has_any = any(configured.values())
response = input("Run test? [Y/n]: ").strip().lower()
if response == 'n':
print("Skipping test")
return
return has_any, configured
test_code = f'''
import os
from biomni.agent import A1
# Use environment variables for API keys
agent = A1(path='{data_path}', llm='claude-sonnet-4-20250514')
def check_data_directory(data_path: str = './data') -> Tuple[bool, str]:
"""Check if Biomni data directory exists and has content."""
path = Path(data_path)
# Simple test query
result = agent.go("What is the primary function of the TP53 gene?")
print("Test result:", result)
'''
if not path.exists():
return False, f"Data directory not found at {data_path}"
test_file = Path('test_biomni.py')
with open(test_file, 'w') as f:
f.write(test_code)
# Check if directory has files (data has been downloaded)
files = list(path.glob('*'))
if len(files) == 0:
return False, f"Data directory exists but is empty. Run agent once to download."
print(f"\nTest script created: {test_file}")
print("Running test...")
# Rough size check (should be ~11GB)
total_size = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())
size_gb = total_size / (1024**3)
if size_gb < 1:
return False, f"Data directory exists but seems incomplete ({size_gb:.1f} GB)"
return True, f"Data directory: {data_path} ({size_gb:.1f} GB) ✓"
def check_disk_space(required_gb: float = 20) -> Tuple[bool, str]:
"""Check if sufficient disk space is available."""
try:
import shutil
stat = shutil.disk_usage('.')
free_gb = stat.free / (1024**3)
if free_gb >= required_gb:
return True, f"Disk space: {free_gb:.1f} GB available ✓"
else:
return False, f"Low disk space: {free_gb:.1f} GB (need {required_gb} GB)"
except Exception as e:
return False, f"Could not check disk space: {e}"
subprocess.run([sys.executable, str(test_file)], check=True)
print("\n✓ Test completed successfully!")
test_file.unlink() # Clean up test file
except subprocess.CalledProcessError:
print("\n❌ Test failed. Check your configuration.")
print(f" Test script saved as {test_file} for debugging")
def test_biomni_import() -> Tuple[bool, str]:
"""Test if Biomni can be imported and initialized."""
try:
from biomni.agent import A1
from biomni.config import default_config
return True, "Biomni import successful ✓"
except ImportError as e:
return False, f"Cannot import Biomni: {e}"
except Exception as e:
return False, f"Biomni import error: {e}"
def generate_example_script(data_path: Path):
"""Generate example usage script."""
example_code = f'''#!/usr/bin/env python3
"""
Example biomni usage script
This demonstrates basic biomni usage patterns.
Modify this script for your research tasks.
"""
def suggest_fixes(results: Dict[str, Tuple[bool, any]]) -> List[str]:
"""Generate suggestions for fixing issues."""
suggestions = []
from biomni.agent import A1
if not results['python'][0]:
suggestions.append("➜ Upgrade Python to 3.8 or higher")
# Initialize agent
agent = A1(
path='{data_path}',
llm='claude-sonnet-4-20250514' # or your preferred LLM
)
if not results['conda'][0]:
suggestions.append("➜ Activate biomni environment: conda activate biomni_e1")
# Example 1: Simple gene query
print("Example 1: Gene function query")
result = agent.go("""
What are the main functions of the BRCA1 gene?
Include information about:
- Molecular function
- Associated diseases
- Protein interactions
""")
print(result)
print("-" * 80)
if not results['dependencies'][0]:
suggestions.append("➜ Install Biomni: pip install biomni --upgrade")
# Example 2: Data analysis
print("\\nExample 2: GWAS analysis")
result = agent.go("""
Explain how to analyze GWAS summary statistics for:
1. Identifying genome-wide significant variants
2. Mapping variants to genes
3. Pathway enrichment analysis
""")
print(result)
if not results['api_keys'][0]:
suggestions.append("➜ Set API key: export ANTHROPIC_API_KEY='your-key'")
suggestions.append(" Or create .env file with API keys")
# Save conversation history
agent.save_conversation_history("example_results.pdf")
print("\\nResults saved to example_results.pdf")
'''
if not results['data'][0]:
suggestions.append("➜ Data will auto-download on first agent.go() call")
example_file = Path('example_biomni_usage.py')
with open(example_file, 'w') as f:
f.write(example_code)
if not results['disk_space'][0]:
suggestions.append("➜ Free up disk space (need ~20GB total)")
return suggestions
print(f"\n✓ Example script created: {example_file}")
def main():
"""Run all environment checks and display results."""
"""Main setup workflow."""
print("=" * 60)
print("Biomni Environment Validation")
print("Biomni Environment Setup")
print("=" * 60)
print()
# Run all checks
results = {}
# Step 1: Conda environment
conda_success = setup_conda_environment()
print("Checking Python version...")
results['python'] = check_python_version()
print(f" {results['python'][1]}")
print()
if conda_success:
print("\n⚠️ Remember to activate the environment:")
print(" conda activate biomni_e1")
print(" pip install biomni --upgrade")
print("Checking conda environment...")
results['conda'] = check_conda_env()
print(f" {results['conda'][1]}")
print()
# Step 2: API keys
api_keys = setup_api_keys()
print("Checking dependencies...")
results['dependencies'] = check_dependencies()
for msg in results['dependencies'][1]:
print(f" {msg}")
print()
if api_keys:
print("\nHow would you like to store API keys?")
print("1. .env file (recommended, local to this directory)")
print("2. Shell export (add to .bashrc/.zshrc)")
print("Checking API keys...")
results['api_keys'] = check_api_keys()
has_keys, key_status = results['api_keys']
for key, configured in key_status.items():
status = "" if configured else ""
print(f" {key}: {status}")
print()
choice = input("Choose [1/2]: ").strip()
print("Checking Biomni data directory...")
results['data'] = check_data_directory()
print(f" {results['data'][1]}")
print()
if choice == '2':
save_api_keys(api_keys, method='shell_export')
else:
save_api_keys(api_keys, method='env_file')
print("Checking disk space...")
results['disk_space'] = check_disk_space()
print(f" {results['disk_space'][1]}")
print()
# Step 3: Data directory
data_path = setup_data_directory()
print("Testing Biomni import...")
results['biomni_import'] = test_biomni_import()
print(f" {results['biomni_import'][1]}")
print()
# Step 4: Generate example script
if data_path:
generate_example_script(data_path)
# Step 5: Test installation (optional)
if api_keys and data_path:
test_installation(data_path)
# Summary
print("\n" + "=" * 60)
print("Setup Complete!")
print("=" * 60)
all_passed = all(result[0] for result in results.values())
if all_passed:
print("All checks passed! Environment is ready.")
print()
print("Quick start:")
print(" from biomni.agent import A1")
print(" agent = A1(path='./data', llm='claude-sonnet-4-20250514')")
print(" agent.go('Your biomedical task')")
if conda_success:
print("Conda environment: biomni_e1")
if api_keys:
print(f"✓ API keys configured: {', '.join(api_keys.keys())}")
if data_path:
print(f"✓ Data directory: {data_path}")
print("\nNext steps:")
if conda_success:
print("1. conda activate biomni_e1")
print("2. pip install biomni --upgrade")
print("3. Run example_biomni_usage.py to test")
else:
print("⚠ Some checks failed. See suggestions below:")
print()
suggestions = suggest_fixes(results)
for suggestion in suggestions:
print(suggestion)
print("1. Install conda/miniconda")
print("2. Run this script again")
print("=" * 60)
return 0 if all_passed else 1
print("\nFor documentation, see:")
print(" - GitHub: https://github.com/snap-stanford/biomni")
print(" - Paper: https://www.biorxiv.org/content/10.1101/2025.05.30.656746v1")
if __name__ == "__main__":
sys.exit(main())
try:
main()
except KeyboardInterrupt:
print("\n\nSetup interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\n❌ Error during setup: {e}")
sys.exit(1)