mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Improved Biomni support
This commit is contained in:
649
scientific-packages/biomni/scripts/generate_report.py
Normal file → Executable file
649
scientific-packages/biomni/scripts/generate_report.py
Normal file → Executable file
@@ -1,381 +1,370 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced PDF Report Generation for Biomni
|
||||
Enhanced PDF report generation for biomni conversation histories.
|
||||
|
||||
This script provides advanced PDF report generation with custom formatting,
|
||||
styling, and metadata for Biomni analysis results.
|
||||
This script provides additional customization options for biomni reports:
|
||||
- Custom styling and branding
|
||||
- Formatted code blocks
|
||||
- Section organization
|
||||
- Metadata inclusion
|
||||
- Export format options (PDF, HTML, Markdown)
|
||||
|
||||
Usage:
|
||||
python generate_report.py --input conversation.json --output report.pdf
|
||||
python generate_report.py --agent-object agent --output report.pdf --format html
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
|
||||
def generate_markdown_report(
|
||||
title: str,
|
||||
sections: list,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
output_path: str = "report.md"
|
||||
def format_conversation_history(
|
||||
messages: List[Dict[str, Any]],
|
||||
include_metadata: bool = True,
|
||||
include_code: bool = True,
|
||||
include_timestamps: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Generate a formatted markdown report.
|
||||
Format conversation history into structured markdown.
|
||||
|
||||
Args:
|
||||
title: Report title
|
||||
sections: List of dicts with 'heading' and 'content' keys
|
||||
metadata: Optional metadata dict (author, date, etc.)
|
||||
output_path: Path to save markdown file
|
||||
messages: List of conversation message dictionaries
|
||||
include_metadata: Include metadata section
|
||||
include_code: Include code blocks
|
||||
include_timestamps: Include message timestamps
|
||||
|
||||
Returns:
|
||||
Path to generated markdown file
|
||||
Formatted markdown string
|
||||
"""
|
||||
md_content = []
|
||||
|
||||
# Title
|
||||
md_content.append(f"# {title}\n")
|
||||
|
||||
# Metadata
|
||||
if metadata:
|
||||
md_content.append("---\n")
|
||||
for key, value in metadata.items():
|
||||
md_content.append(f"**{key}:** {value} \n")
|
||||
md_content.append("---\n\n")
|
||||
|
||||
# Sections
|
||||
for section in sections:
|
||||
heading = section.get('heading', 'Section')
|
||||
content = section.get('content', '')
|
||||
level = section.get('level', 2) # Default to h2
|
||||
|
||||
md_content.append(f"{'#' * level} {heading}\n\n")
|
||||
md_content.append(f"{content}\n\n")
|
||||
|
||||
# Write to file
|
||||
output = Path(output_path)
|
||||
output.write_text('\n'.join(md_content))
|
||||
|
||||
return str(output)
|
||||
|
||||
|
||||
def convert_to_pdf_weasyprint(
|
||||
markdown_path: str,
|
||||
output_path: str,
|
||||
css_style: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Convert markdown to PDF using WeasyPrint.
|
||||
|
||||
Args:
|
||||
markdown_path: Path to markdown file
|
||||
output_path: Path for output PDF
|
||||
css_style: Optional CSS stylesheet path
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
import markdown
|
||||
from weasyprint import HTML, CSS
|
||||
|
||||
# Read markdown
|
||||
with open(markdown_path, 'r') as f:
|
||||
md_content = f.read()
|
||||
|
||||
# Convert to HTML
|
||||
html_content = markdown.markdown(
|
||||
md_content,
|
||||
extensions=['tables', 'fenced_code', 'codehilite']
|
||||
)
|
||||
|
||||
# Wrap in HTML template
|
||||
html_template = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Biomni Report</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: 'Helvetica', 'Arial', sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
max-width: 800px;
|
||||
margin: 40px auto;
|
||||
padding: 20px;
|
||||
}}
|
||||
h1 {{
|
||||
color: #2c3e50;
|
||||
border-bottom: 3px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}}
|
||||
h2 {{
|
||||
color: #34495e;
|
||||
margin-top: 30px;
|
||||
border-bottom: 1px solid #bdc3c7;
|
||||
padding-bottom: 5px;
|
||||
}}
|
||||
h3 {{
|
||||
color: #7f8c8d;
|
||||
}}
|
||||
code {{
|
||||
background-color: #f4f4f4;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
font-family: 'Courier New', monospace;
|
||||
}}
|
||||
pre {{
|
||||
background-color: #f4f4f4;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
overflow-x: auto;
|
||||
}}
|
||||
table {{
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
th, td {{
|
||||
border: 1px solid #ddd;
|
||||
padding: 12px;
|
||||
text-align: left;
|
||||
}}
|
||||
th {{
|
||||
background-color: #3498db;
|
||||
color: white;
|
||||
}}
|
||||
tr:nth-child(even) {{
|
||||
background-color: #f9f9f9;
|
||||
}}
|
||||
.metadata {{
|
||||
background-color: #ecf0f1;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{html_content}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Generate PDF
|
||||
pdf = HTML(string=html_template)
|
||||
|
||||
# Add custom CSS if provided
|
||||
stylesheets = []
|
||||
if css_style and Path(css_style).exists():
|
||||
stylesheets.append(CSS(filename=css_style))
|
||||
|
||||
pdf.write_pdf(output_path, stylesheets=stylesheets)
|
||||
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error generating PDF: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
|
||||
"""
|
||||
Convert markdown to PDF using Pandoc.
|
||||
|
||||
Args:
|
||||
markdown_path: Path to markdown file
|
||||
output_path: Path for output PDF
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
# Check if pandoc is installed
|
||||
result = subprocess.run(
|
||||
['pandoc', '--version'],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print("Error: Pandoc not installed")
|
||||
return False
|
||||
|
||||
# Convert with pandoc
|
||||
result = subprocess.run(
|
||||
[
|
||||
'pandoc',
|
||||
markdown_path,
|
||||
'-o', output_path,
|
||||
'--pdf-engine=pdflatex',
|
||||
'-V', 'geometry:margin=1in',
|
||||
'--toc'
|
||||
],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"Pandoc error: {result.stderr}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except FileNotFoundError:
|
||||
print("Error: Pandoc not found. Install from https://pandoc.org/")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def create_biomni_report(
|
||||
conversation_history: list,
|
||||
output_path: str = "biomni_report.pdf",
|
||||
method: str = "weasyprint"
|
||||
) -> bool:
|
||||
"""
|
||||
Create a formatted PDF report from Biomni conversation history.
|
||||
|
||||
Args:
|
||||
conversation_history: List of conversation turns
|
||||
output_path: Output PDF path
|
||||
method: Conversion method ('weasyprint' or 'pandoc')
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
# Prepare report sections
|
||||
metadata = {
|
||||
'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'Tool': 'Biomni AI Agent',
|
||||
'Report Type': 'Analysis Summary'
|
||||
}
|
||||
|
||||
sections = []
|
||||
|
||||
# Executive Summary
|
||||
sections.append({
|
||||
'heading': 'Executive Summary',
|
||||
'level': 2,
|
||||
'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
|
||||
})
|
||||
# Header
|
||||
sections.append("# Biomni Analysis Report\n")
|
||||
|
||||
# Conversation history
|
||||
for i, turn in enumerate(conversation_history, 1):
|
||||
sections.append({
|
||||
'heading': f'Task {i}: {turn.get("task", "Analysis")}',
|
||||
'level': 2,
|
||||
'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
|
||||
})
|
||||
# Metadata
|
||||
if include_metadata:
|
||||
sections.append("## Metadata\n")
|
||||
sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
sections.append(f"- **Number of interactions**: {len(messages)}")
|
||||
sections.append("\n---\n")
|
||||
|
||||
# Process messages
|
||||
sections.append("## Analysis\n")
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
role = msg.get('role', 'unknown')
|
||||
content = msg.get('content', '')
|
||||
|
||||
if role == 'user':
|
||||
sections.append(f"### Task {i // 2 + 1}\n")
|
||||
sections.append(f"**Query:**\n```\n{content}\n```\n")
|
||||
|
||||
elif role == 'assistant':
|
||||
sections.append(f"**Response:**\n")
|
||||
|
||||
# Check if content contains code
|
||||
if include_code and ('```' in content or 'import ' in content):
|
||||
# Attempt to separate text and code
|
||||
parts = content.split('```')
|
||||
for j, part in enumerate(parts):
|
||||
if j % 2 == 0:
|
||||
# Text content
|
||||
if part.strip():
|
||||
sections.append(f"{part.strip()}\n")
|
||||
else:
|
||||
# Code content
|
||||
# Check if language is specified
|
||||
lines = part.split('\n', 1)
|
||||
if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
|
||||
lang = lines[0].strip()
|
||||
code = lines[1]
|
||||
else:
|
||||
lang = 'python' # Default to python
|
||||
code = part
|
||||
|
||||
sections.append(f"```{lang}\n{code}\n```\n")
|
||||
else:
|
||||
sections.append(f"{content}\n")
|
||||
|
||||
sections.append("\n---\n")
|
||||
|
||||
return '\n'.join(sections)
|
||||
|
||||
|
||||
def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
|
||||
"""
|
||||
Convert markdown to styled HTML.
|
||||
|
||||
Args:
|
||||
markdown_content: Markdown string
|
||||
title: HTML page title
|
||||
|
||||
Returns:
|
||||
HTML string
|
||||
"""
|
||||
# Simple markdown to HTML conversion
|
||||
# For production use, consider using a library like markdown or mistune
|
||||
|
||||
html_template = f"""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{title}</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 900px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
color: #333;
|
||||
}}
|
||||
h1 {{
|
||||
color: #2c3e50;
|
||||
border-bottom: 3px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}}
|
||||
h2 {{
|
||||
color: #34495e;
|
||||
margin-top: 30px;
|
||||
border-bottom: 2px solid #95a5a6;
|
||||
padding-bottom: 5px;
|
||||
}}
|
||||
h3 {{
|
||||
color: #555;
|
||||
}}
|
||||
code {{
|
||||
background-color: #f4f4f4;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
|
||||
}}
|
||||
pre {{
|
||||
background-color: #f8f8f8;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 5px;
|
||||
padding: 15px;
|
||||
overflow-x: auto;
|
||||
}}
|
||||
pre code {{
|
||||
background-color: transparent;
|
||||
padding: 0;
|
||||
}}
|
||||
hr {{
|
||||
border: none;
|
||||
border-top: 1px solid #ddd;
|
||||
margin: 30px 0;
|
||||
}}
|
||||
.metadata {{
|
||||
background-color: #ecf0f1;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 20px;
|
||||
}}
|
||||
.task {{
|
||||
background-color: #e8f4f8;
|
||||
padding: 10px;
|
||||
border-left: 4px solid #3498db;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
.footer {{
|
||||
margin-top: 50px;
|
||||
text-align: center;
|
||||
color: #7f8c8d;
|
||||
font-size: 0.9em;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="content">
|
||||
{markdown_to_html_simple(markdown_content)}
|
||||
</div>
|
||||
<div class="footer">
|
||||
<p>Generated with Biomni | Stanford SNAP Lab</p>
|
||||
<p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return html_template
|
||||
|
||||
|
||||
def markdown_to_html_simple(md: str) -> str:
|
||||
"""Simple markdown to HTML converter (basic implementation)."""
|
||||
lines = md.split('\n')
|
||||
html_lines = []
|
||||
in_code_block = False
|
||||
in_list = False
|
||||
|
||||
for line in lines:
|
||||
# Code blocks
|
||||
if line.startswith('```'):
|
||||
if in_code_block:
|
||||
html_lines.append('</code></pre>')
|
||||
in_code_block = False
|
||||
else:
|
||||
lang = line[3:].strip()
|
||||
html_lines.append(f'<pre><code class="language-{lang}">')
|
||||
in_code_block = True
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
html_lines.append(line)
|
||||
continue
|
||||
|
||||
# Headers
|
||||
if line.startswith('# '):
|
||||
html_lines.append(f'<h1>{line[2:]}</h1>')
|
||||
elif line.startswith('## '):
|
||||
html_lines.append(f'<h2>{line[3:]}</h2>')
|
||||
elif line.startswith('### '):
|
||||
html_lines.append(f'<h3>{line[4:]}</h3>')
|
||||
# Lists
|
||||
elif line.startswith('- '):
|
||||
if not in_list:
|
||||
html_lines.append('<ul>')
|
||||
in_list = True
|
||||
html_lines.append(f'<li>{line[2:]}</li>')
|
||||
else:
|
||||
if in_list:
|
||||
html_lines.append('</ul>')
|
||||
in_list = False
|
||||
|
||||
# Horizontal rule
|
||||
if line.strip() == '---':
|
||||
html_lines.append('<hr>')
|
||||
# Bold
|
||||
elif '**' in line:
|
||||
line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
|
||||
html_lines.append(f'<p>{line}</p>')
|
||||
# Regular paragraph
|
||||
elif line.strip():
|
||||
html_lines.append(f'<p>{line}</p>')
|
||||
else:
|
||||
html_lines.append('<br>')
|
||||
|
||||
if in_list:
|
||||
html_lines.append('</ul>')
|
||||
|
||||
return '\n'.join(html_lines)
|
||||
|
||||
|
||||
def generate_report(
|
||||
conversation_data: Dict[str, Any],
|
||||
output_path: Path,
|
||||
format: str = 'markdown',
|
||||
title: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Generate formatted report from conversation data.
|
||||
|
||||
Args:
|
||||
conversation_data: Conversation history dictionary
|
||||
output_path: Output file path
|
||||
format: Output format ('markdown', 'html', or 'pdf')
|
||||
title: Report title
|
||||
"""
|
||||
messages = conversation_data.get('messages', [])
|
||||
|
||||
if not title:
|
||||
title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"
|
||||
|
||||
# Generate markdown
|
||||
md_path = output_path.replace('.pdf', '.md')
|
||||
generate_markdown_report(
|
||||
title="Biomni Analysis Report",
|
||||
sections=sections,
|
||||
metadata=metadata,
|
||||
output_path=md_path
|
||||
)
|
||||
markdown_content = format_conversation_history(messages)
|
||||
|
||||
if format == 'markdown':
|
||||
output_path.write_text(markdown_content)
|
||||
print(f"✓ Markdown report saved to {output_path}")
|
||||
|
||||
elif format == 'html':
|
||||
html_content = markdown_to_html(markdown_content, title)
|
||||
output_path.write_text(html_content)
|
||||
print(f"✓ HTML report saved to {output_path}")
|
||||
|
||||
elif format == 'pdf':
|
||||
# For PDF generation, we'd typically use a library like weasyprint or reportlab
|
||||
# This is a placeholder implementation
|
||||
print("PDF generation requires additional dependencies (weasyprint or reportlab)")
|
||||
print("Falling back to HTML format...")
|
||||
|
||||
html_path = output_path.with_suffix('.html')
|
||||
html_content = markdown_to_html(markdown_content, title)
|
||||
html_path.write_text(html_content)
|
||||
|
||||
print(f"✓ HTML report saved to {html_path}")
|
||||
print(" To convert to PDF:")
|
||||
print(f" 1. Install weasyprint: pip install weasyprint")
|
||||
print(f" 2. Run: weasyprint {html_path} {output_path}")
|
||||
|
||||
# Convert to PDF
|
||||
if method == 'weasyprint':
|
||||
success = convert_to_pdf_weasyprint(md_path, output_path)
|
||||
elif method == 'pandoc':
|
||||
success = convert_to_pdf_pandoc(md_path, output_path)
|
||||
else:
|
||||
print(f"Unknown method: {method}")
|
||||
return False
|
||||
|
||||
if success:
|
||||
print(f"✓ Report generated: {output_path}")
|
||||
print(f" Markdown: {md_path}")
|
||||
else:
|
||||
print("✗ Failed to generate PDF")
|
||||
print(f" Markdown available: {md_path}")
|
||||
|
||||
return success
|
||||
raise ValueError(f"Unsupported format: {format}")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI for report generation."""
|
||||
"""Main entry point for CLI usage."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate formatted PDF reports for Biomni analyses'
|
||||
description="Generate enhanced reports from biomni conversation histories"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'input',
|
||||
type=str,
|
||||
help='Input markdown file or conversation history'
|
||||
'--input',
|
||||
type=Path,
|
||||
required=True,
|
||||
help='Input conversation history JSON file'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output',
|
||||
type=str,
|
||||
default='biomni_report.pdf',
|
||||
help='Output PDF path (default: biomni_report.pdf)'
|
||||
'--output',
|
||||
type=Path,
|
||||
required=True,
|
||||
help='Output report file path'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-m', '--method',
|
||||
type=str,
|
||||
choices=['weasyprint', 'pandoc'],
|
||||
default='weasyprint',
|
||||
help='Conversion method (default: weasyprint)'
|
||||
'--format',
|
||||
choices=['markdown', 'html', 'pdf'],
|
||||
default='markdown',
|
||||
help='Output format (default: markdown)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--css',
|
||||
'--title',
|
||||
type=str,
|
||||
help='Custom CSS stylesheet path'
|
||||
help='Report title (optional)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check if input is markdown or conversation history
|
||||
input_path = Path(args.input)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input file not found: {args.input}")
|
||||
return 1
|
||||
|
||||
# If input is markdown, convert directly
|
||||
if input_path.suffix == '.md':
|
||||
if args.method == 'weasyprint':
|
||||
success = convert_to_pdf_weasyprint(
|
||||
str(input_path),
|
||||
args.output,
|
||||
args.css
|
||||
)
|
||||
else:
|
||||
success = convert_to_pdf_pandoc(str(input_path), args.output)
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
# Otherwise, assume it's conversation history (JSON)
|
||||
# Load conversation data
|
||||
try:
|
||||
import json
|
||||
with open(input_path) as f:
|
||||
history = json.load(f)
|
||||
|
||||
success = create_biomni_report(
|
||||
history,
|
||||
args.output,
|
||||
args.method
|
||||
)
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
with open(args.input, 'r') as f:
|
||||
conversation_data = json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"❌ Input file not found: {args.input}")
|
||||
return 1
|
||||
except json.JSONDecodeError:
|
||||
print("Error: Input file is not valid JSON or markdown")
|
||||
print(f"❌ Invalid JSON in input file: {args.input}")
|
||||
return 1
|
||||
|
||||
# Generate report
|
||||
try:
|
||||
generate_report(
|
||||
conversation_data,
|
||||
args.output,
|
||||
format=args.format,
|
||||
title=args.title
|
||||
)
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"❌ Error generating report: {e}")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
sys.exit(main())
|
||||
|
||||
457
scientific-packages/biomni/scripts/setup_environment.py
Normal file → Executable file
457
scientific-packages/biomni/scripts/setup_environment.py
Normal file → Executable file
@@ -1,230 +1,355 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Biomni Environment Setup and Validation Script
|
||||
Interactive setup script for biomni environment configuration.
|
||||
|
||||
This script helps users set up and validate their Biomni environment,
|
||||
including checking dependencies, API keys, and data availability.
|
||||
This script helps users set up:
|
||||
1. Conda environment with required dependencies
|
||||
2. API keys for LLM providers
|
||||
3. Data lake directory configuration
|
||||
4. MCP server setup (optional)
|
||||
|
||||
Usage:
|
||||
python setup_environment.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def check_python_version() -> Tuple[bool, str]:
|
||||
"""Check if Python version is compatible."""
|
||||
version = sys.version_info
|
||||
if version.major == 3 and version.minor >= 8:
|
||||
return True, f"Python {version.major}.{version.minor}.{version.micro} ✓"
|
||||
else:
|
||||
return False, f"Python {version.major}.{version.minor} - requires Python 3.8+"
|
||||
|
||||
|
||||
def check_conda_env() -> Tuple[bool, str]:
|
||||
"""Check if running in biomni conda environment."""
|
||||
conda_env = os.environ.get('CONDA_DEFAULT_ENV', None)
|
||||
if conda_env == 'biomni_e1':
|
||||
return True, f"Conda environment: {conda_env} ✓"
|
||||
else:
|
||||
return False, f"Not in biomni_e1 environment (current: {conda_env})"
|
||||
|
||||
|
||||
def check_package_installed(package: str) -> bool:
|
||||
"""Check if a Python package is installed."""
|
||||
def check_conda_installed() -> bool:
|
||||
"""Check if conda is available in the system."""
|
||||
try:
|
||||
__import__(package)
|
||||
subprocess.run(
|
||||
['conda', '--version'],
|
||||
capture_output=True,
|
||||
check=True
|
||||
)
|
||||
return True
|
||||
except ImportError:
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
return False
|
||||
|
||||
|
||||
def check_dependencies() -> Tuple[bool, List[str]]:
|
||||
"""Check for required and optional dependencies."""
|
||||
required = ['biomni']
|
||||
optional = ['weasyprint', 'markdown2pdf']
|
||||
def setup_conda_environment():
|
||||
"""Guide user through conda environment setup."""
|
||||
print("\n=== Conda Environment Setup ===")
|
||||
|
||||
missing_required = [pkg for pkg in required if not check_package_installed(pkg)]
|
||||
missing_optional = [pkg for pkg in optional if not check_package_installed(pkg)]
|
||||
if not check_conda_installed():
|
||||
print("❌ Conda not found. Please install Miniconda or Anaconda:")
|
||||
print(" https://docs.conda.io/en/latest/miniconda.html")
|
||||
return False
|
||||
|
||||
messages = []
|
||||
success = len(missing_required) == 0
|
||||
print("✓ Conda is installed")
|
||||
|
||||
if missing_required:
|
||||
messages.append(f"Missing required packages: {', '.join(missing_required)}")
|
||||
messages.append("Install with: pip install biomni --upgrade")
|
||||
# Check if biomni_e1 environment exists
|
||||
result = subprocess.run(
|
||||
['conda', 'env', 'list'],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if 'biomni_e1' in result.stdout:
|
||||
print("✓ biomni_e1 environment already exists")
|
||||
return True
|
||||
|
||||
print("\nCreating biomni_e1 conda environment...")
|
||||
print("This will install Python 3.10 and required dependencies.")
|
||||
|
||||
response = input("Proceed? [y/N]: ").strip().lower()
|
||||
if response != 'y':
|
||||
print("Skipping conda environment setup")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create conda environment
|
||||
subprocess.run(
|
||||
['conda', 'create', '-n', 'biomni_e1', 'python=3.10', '-y'],
|
||||
check=True
|
||||
)
|
||||
|
||||
print("\n✓ Conda environment created successfully")
|
||||
print("\nTo activate: conda activate biomni_e1")
|
||||
print("Then install biomni: pip install biomni --upgrade")
|
||||
return True
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ Failed to create conda environment: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def setup_api_keys() -> Dict[str, str]:
|
||||
"""Interactive API key configuration."""
|
||||
print("\n=== API Key Configuration ===")
|
||||
print("Biomni supports multiple LLM providers.")
|
||||
print("At minimum, configure one provider.")
|
||||
|
||||
api_keys = {}
|
||||
|
||||
# Anthropic (recommended)
|
||||
print("\n1. Anthropic Claude (Recommended)")
|
||||
print(" Get your API key from: https://console.anthropic.com/")
|
||||
anthropic_key = input(" Enter ANTHROPIC_API_KEY (or press Enter to skip): ").strip()
|
||||
if anthropic_key:
|
||||
api_keys['ANTHROPIC_API_KEY'] = anthropic_key
|
||||
|
||||
# OpenAI
|
||||
print("\n2. OpenAI")
|
||||
print(" Get your API key from: https://platform.openai.com/api-keys")
|
||||
openai_key = input(" Enter OPENAI_API_KEY (or press Enter to skip): ").strip()
|
||||
if openai_key:
|
||||
api_keys['OPENAI_API_KEY'] = openai_key
|
||||
|
||||
# Google Gemini
|
||||
print("\n3. Google Gemini")
|
||||
print(" Get your API key from: https://makersuite.google.com/app/apikey")
|
||||
google_key = input(" Enter GOOGLE_API_KEY (or press Enter to skip): ").strip()
|
||||
if google_key:
|
||||
api_keys['GOOGLE_API_KEY'] = google_key
|
||||
|
||||
# Groq
|
||||
print("\n4. Groq")
|
||||
print(" Get your API key from: https://console.groq.com/keys")
|
||||
groq_key = input(" Enter GROQ_API_KEY (or press Enter to skip): ").strip()
|
||||
if groq_key:
|
||||
api_keys['GROQ_API_KEY'] = groq_key
|
||||
|
||||
if not api_keys:
|
||||
print("\n⚠️ No API keys configured. You'll need at least one to use biomni.")
|
||||
return {}
|
||||
|
||||
return api_keys
|
||||
|
||||
|
||||
def save_api_keys(api_keys: Dict[str, str], method: str = 'env_file'):
|
||||
"""Save API keys using specified method."""
|
||||
if method == 'env_file':
|
||||
env_file = Path.cwd() / '.env'
|
||||
|
||||
# Read existing .env if present
|
||||
existing_vars = {}
|
||||
if env_file.exists():
|
||||
with open(env_file, 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
if '=' in line:
|
||||
key, val = line.split('=', 1)
|
||||
existing_vars[key.strip()] = val.strip()
|
||||
|
||||
# Update with new keys
|
||||
existing_vars.update(api_keys)
|
||||
|
||||
# Write to .env
|
||||
with open(env_file, 'w') as f:
|
||||
f.write("# Biomni API Keys\n")
|
||||
f.write(f"# Generated by setup_environment.py\n\n")
|
||||
for key, value in existing_vars.items():
|
||||
f.write(f"{key}={value}\n")
|
||||
|
||||
print(f"\n✓ API keys saved to {env_file}")
|
||||
print(" Keys will be loaded automatically when biomni runs in this directory")
|
||||
|
||||
elif method == 'shell_export':
|
||||
shell_file = Path.home() / '.bashrc' # or .zshrc for zsh users
|
||||
|
||||
print("\n📋 Add these lines to your shell configuration:")
|
||||
for key, value in api_keys.items():
|
||||
print(f" export {key}=\"{value}\"")
|
||||
|
||||
print(f"\nThen run: source {shell_file}")
|
||||
|
||||
|
||||
def setup_data_directory() -> Optional[Path]:
|
||||
"""Configure biomni data lake directory."""
|
||||
print("\n=== Data Lake Configuration ===")
|
||||
print("Biomni requires ~11GB for integrated biomedical databases.")
|
||||
|
||||
default_path = Path.cwd() / 'biomni_data'
|
||||
print(f"\nDefault location: {default_path}")
|
||||
|
||||
response = input("Use default location? [Y/n]: ").strip().lower()
|
||||
|
||||
if response == 'n':
|
||||
custom_path = input("Enter custom path: ").strip()
|
||||
data_path = Path(custom_path).expanduser().resolve()
|
||||
else:
|
||||
messages.append("Required packages: ✓")
|
||||
data_path = default_path
|
||||
|
||||
if missing_optional:
|
||||
messages.append(f"Missing optional packages: {', '.join(missing_optional)}")
|
||||
messages.append("For PDF reports, install: pip install weasyprint")
|
||||
# Create directory if it doesn't exist
|
||||
data_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return success, messages
|
||||
print(f"\n✓ Data directory configured: {data_path}")
|
||||
print(" Data will be downloaded automatically on first use")
|
||||
|
||||
return data_path
|
||||
|
||||
|
||||
def check_api_keys() -> Tuple[bool, Dict[str, bool]]:
|
||||
"""Check which API keys are configured."""
|
||||
api_keys = {
|
||||
'ANTHROPIC_API_KEY': os.environ.get('ANTHROPIC_API_KEY'),
|
||||
'OPENAI_API_KEY': os.environ.get('OPENAI_API_KEY'),
|
||||
'GEMINI_API_KEY': os.environ.get('GEMINI_API_KEY'),
|
||||
'GROQ_API_KEY': os.environ.get('GROQ_API_KEY'),
|
||||
}
|
||||
def test_installation(data_path: Path):
|
||||
"""Test biomni installation with a simple query."""
|
||||
print("\n=== Installation Test ===")
|
||||
print("Testing biomni installation with a simple query...")
|
||||
|
||||
configured = {key: bool(value) for key, value in api_keys.items()}
|
||||
has_any = any(configured.values())
|
||||
response = input("Run test? [Y/n]: ").strip().lower()
|
||||
if response == 'n':
|
||||
print("Skipping test")
|
||||
return
|
||||
|
||||
return has_any, configured
|
||||
test_code = f'''
|
||||
import os
|
||||
from biomni.agent import A1
|
||||
|
||||
# Use environment variables for API keys
|
||||
agent = A1(path='{data_path}', llm='claude-sonnet-4-20250514')
|
||||
|
||||
def check_data_directory(data_path: str = './data') -> Tuple[bool, str]:
|
||||
"""Check if Biomni data directory exists and has content."""
|
||||
path = Path(data_path)
|
||||
# Simple test query
|
||||
result = agent.go("What is the primary function of the TP53 gene?")
|
||||
print("Test result:", result)
|
||||
'''
|
||||
|
||||
if not path.exists():
|
||||
return False, f"Data directory not found at {data_path}"
|
||||
test_file = Path('test_biomni.py')
|
||||
with open(test_file, 'w') as f:
|
||||
f.write(test_code)
|
||||
|
||||
# Check if directory has files (data has been downloaded)
|
||||
files = list(path.glob('*'))
|
||||
if len(files) == 0:
|
||||
return False, f"Data directory exists but is empty. Run agent once to download."
|
||||
print(f"\nTest script created: {test_file}")
|
||||
print("Running test...")
|
||||
|
||||
# Rough size check (should be ~11GB)
|
||||
total_size = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())
|
||||
size_gb = total_size / (1024**3)
|
||||
|
||||
if size_gb < 1:
|
||||
return False, f"Data directory exists but seems incomplete ({size_gb:.1f} GB)"
|
||||
|
||||
return True, f"Data directory: {data_path} ({size_gb:.1f} GB) ✓"
|
||||
|
||||
|
||||
def check_disk_space(required_gb: float = 20) -> Tuple[bool, str]:
|
||||
"""Check if sufficient disk space is available."""
|
||||
try:
|
||||
import shutil
|
||||
stat = shutil.disk_usage('.')
|
||||
free_gb = stat.free / (1024**3)
|
||||
|
||||
if free_gb >= required_gb:
|
||||
return True, f"Disk space: {free_gb:.1f} GB available ✓"
|
||||
else:
|
||||
return False, f"Low disk space: {free_gb:.1f} GB (need {required_gb} GB)"
|
||||
except Exception as e:
|
||||
return False, f"Could not check disk space: {e}"
|
||||
subprocess.run([sys.executable, str(test_file)], check=True)
|
||||
print("\n✓ Test completed successfully!")
|
||||
test_file.unlink() # Clean up test file
|
||||
except subprocess.CalledProcessError:
|
||||
print("\n❌ Test failed. Check your configuration.")
|
||||
print(f" Test script saved as {test_file} for debugging")
|
||||
|
||||
|
||||
def test_biomni_import() -> Tuple[bool, str]:
|
||||
"""Test if Biomni can be imported and initialized."""
|
||||
try:
|
||||
from biomni.agent import A1
|
||||
from biomni.config import default_config
|
||||
return True, "Biomni import successful ✓"
|
||||
except ImportError as e:
|
||||
return False, f"Cannot import Biomni: {e}"
|
||||
except Exception as e:
|
||||
return False, f"Biomni import error: {e}"
|
||||
def generate_example_script(data_path: Path):
|
||||
"""Generate example usage script."""
|
||||
example_code = f'''#!/usr/bin/env python3
|
||||
"""
|
||||
Example biomni usage script
|
||||
|
||||
This demonstrates basic biomni usage patterns.
|
||||
Modify this script for your research tasks.
|
||||
"""
|
||||
|
||||
def suggest_fixes(results: Dict[str, Tuple[bool, any]]) -> List[str]:
|
||||
"""Generate suggestions for fixing issues."""
|
||||
suggestions = []
|
||||
from biomni.agent import A1
|
||||
|
||||
if not results['python'][0]:
|
||||
suggestions.append("➜ Upgrade Python to 3.8 or higher")
|
||||
# Initialize agent
|
||||
agent = A1(
|
||||
path='{data_path}',
|
||||
llm='claude-sonnet-4-20250514' # or your preferred LLM
|
||||
)
|
||||
|
||||
if not results['conda'][0]:
|
||||
suggestions.append("➜ Activate biomni environment: conda activate biomni_e1")
|
||||
# Example 1: Simple gene query
|
||||
print("Example 1: Gene function query")
|
||||
result = agent.go("""
|
||||
What are the main functions of the BRCA1 gene?
|
||||
Include information about:
|
||||
- Molecular function
|
||||
- Associated diseases
|
||||
- Protein interactions
|
||||
""")
|
||||
print(result)
|
||||
print("-" * 80)
|
||||
|
||||
if not results['dependencies'][0]:
|
||||
suggestions.append("➜ Install Biomni: pip install biomni --upgrade")
|
||||
# Example 2: Data analysis
|
||||
print("\\nExample 2: GWAS analysis")
|
||||
result = agent.go("""
|
||||
Explain how to analyze GWAS summary statistics for:
|
||||
1. Identifying genome-wide significant variants
|
||||
2. Mapping variants to genes
|
||||
3. Pathway enrichment analysis
|
||||
""")
|
||||
print(result)
|
||||
|
||||
if not results['api_keys'][0]:
|
||||
suggestions.append("➜ Set API key: export ANTHROPIC_API_KEY='your-key'")
|
||||
suggestions.append(" Or create .env file with API keys")
|
||||
# Save conversation history
|
||||
agent.save_conversation_history("example_results.pdf")
|
||||
print("\\nResults saved to example_results.pdf")
|
||||
'''
|
||||
|
||||
if not results['data'][0]:
|
||||
suggestions.append("➜ Data will auto-download on first agent.go() call")
|
||||
example_file = Path('example_biomni_usage.py')
|
||||
with open(example_file, 'w') as f:
|
||||
f.write(example_code)
|
||||
|
||||
if not results['disk_space'][0]:
|
||||
suggestions.append("➜ Free up disk space (need ~20GB total)")
|
||||
|
||||
return suggestions
|
||||
print(f"\n✓ Example script created: {example_file}")
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all environment checks and display results."""
|
||||
"""Main setup workflow."""
|
||||
print("=" * 60)
|
||||
print("Biomni Environment Validation")
|
||||
print("Biomni Environment Setup")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Run all checks
|
||||
results = {}
|
||||
# Step 1: Conda environment
|
||||
conda_success = setup_conda_environment()
|
||||
|
||||
print("Checking Python version...")
|
||||
results['python'] = check_python_version()
|
||||
print(f" {results['python'][1]}")
|
||||
print()
|
||||
if conda_success:
|
||||
print("\n⚠️ Remember to activate the environment:")
|
||||
print(" conda activate biomni_e1")
|
||||
print(" pip install biomni --upgrade")
|
||||
|
||||
print("Checking conda environment...")
|
||||
results['conda'] = check_conda_env()
|
||||
print(f" {results['conda'][1]}")
|
||||
print()
|
||||
# Step 2: API keys
|
||||
api_keys = setup_api_keys()
|
||||
|
||||
print("Checking dependencies...")
|
||||
results['dependencies'] = check_dependencies()
|
||||
for msg in results['dependencies'][1]:
|
||||
print(f" {msg}")
|
||||
print()
|
||||
if api_keys:
|
||||
print("\nHow would you like to store API keys?")
|
||||
print("1. .env file (recommended, local to this directory)")
|
||||
print("2. Shell export (add to .bashrc/.zshrc)")
|
||||
|
||||
print("Checking API keys...")
|
||||
results['api_keys'] = check_api_keys()
|
||||
has_keys, key_status = results['api_keys']
|
||||
for key, configured in key_status.items():
|
||||
status = "✓" if configured else "✗"
|
||||
print(f" {key}: {status}")
|
||||
print()
|
||||
choice = input("Choose [1/2]: ").strip()
|
||||
|
||||
print("Checking Biomni data directory...")
|
||||
results['data'] = check_data_directory()
|
||||
print(f" {results['data'][1]}")
|
||||
print()
|
||||
if choice == '2':
|
||||
save_api_keys(api_keys, method='shell_export')
|
||||
else:
|
||||
save_api_keys(api_keys, method='env_file')
|
||||
|
||||
print("Checking disk space...")
|
||||
results['disk_space'] = check_disk_space()
|
||||
print(f" {results['disk_space'][1]}")
|
||||
print()
|
||||
# Step 3: Data directory
|
||||
data_path = setup_data_directory()
|
||||
|
||||
print("Testing Biomni import...")
|
||||
results['biomni_import'] = test_biomni_import()
|
||||
print(f" {results['biomni_import'][1]}")
|
||||
print()
|
||||
# Step 4: Generate example script
|
||||
if data_path:
|
||||
generate_example_script(data_path)
|
||||
|
||||
# Step 5: Test installation (optional)
|
||||
if api_keys and data_path:
|
||||
test_installation(data_path)
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print("Setup Complete!")
|
||||
print("=" * 60)
|
||||
all_passed = all(result[0] for result in results.values())
|
||||
|
||||
if all_passed:
|
||||
print("✓ All checks passed! Environment is ready.")
|
||||
print()
|
||||
print("Quick start:")
|
||||
print(" from biomni.agent import A1")
|
||||
print(" agent = A1(path='./data', llm='claude-sonnet-4-20250514')")
|
||||
print(" agent.go('Your biomedical task')")
|
||||
if conda_success:
|
||||
print("✓ Conda environment: biomni_e1")
|
||||
|
||||
if api_keys:
|
||||
print(f"✓ API keys configured: {', '.join(api_keys.keys())}")
|
||||
|
||||
if data_path:
|
||||
print(f"✓ Data directory: {data_path}")
|
||||
|
||||
print("\nNext steps:")
|
||||
if conda_success:
|
||||
print("1. conda activate biomni_e1")
|
||||
print("2. pip install biomni --upgrade")
|
||||
print("3. Run example_biomni_usage.py to test")
|
||||
else:
|
||||
print("⚠ Some checks failed. See suggestions below:")
|
||||
print()
|
||||
suggestions = suggest_fixes(results)
|
||||
for suggestion in suggestions:
|
||||
print(suggestion)
|
||||
print("1. Install conda/miniconda")
|
||||
print("2. Run this script again")
|
||||
|
||||
print("=" * 60)
|
||||
|
||||
return 0 if all_passed else 1
|
||||
print("\nFor documentation, see:")
|
||||
print(" - GitHub: https://github.com/snap-stanford/biomni")
|
||||
print(" - Paper: https://www.biorxiv.org/content/10.1101/2025.05.30.656746v1")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nSetup interrupted by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error during setup: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
Reference in New Issue
Block a user