Add more scientific skills

This commit is contained in:
Timothy Kassis
2025-10-19 14:12:02 -07:00
parent 78d5ac2b56
commit 660c8574d0
210 changed files with 88957 additions and 1 deletions

View File

@@ -0,0 +1,381 @@
#!/usr/bin/env python3
"""
Enhanced PDF Report Generation for Biomni
This script provides advanced PDF report generation with custom formatting,
styling, and metadata for Biomni analysis results.
"""
import argparse
import sys
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, Any
def generate_markdown_report(
title: str,
sections: list,
metadata: Optional[Dict[str, Any]] = None,
output_path: str = "report.md"
) -> str:
"""
Generate a formatted markdown report.
Args:
title: Report title
sections: List of dicts with 'heading' and 'content' keys
metadata: Optional metadata dict (author, date, etc.)
output_path: Path to save markdown file
Returns:
Path to generated markdown file
"""
md_content = []
# Title
md_content.append(f"# {title}\n")
# Metadata
if metadata:
md_content.append("---\n")
for key, value in metadata.items():
md_content.append(f"**{key}:** {value} \n")
md_content.append("---\n\n")
# Sections
for section in sections:
heading = section.get('heading', 'Section')
content = section.get('content', '')
level = section.get('level', 2) # Default to h2
md_content.append(f"{'#' * level} {heading}\n\n")
md_content.append(f"{content}\n\n")
# Write to file
output = Path(output_path)
output.write_text('\n'.join(md_content))
return str(output)
def convert_to_pdf_weasyprint(
markdown_path: str,
output_path: str,
css_style: Optional[str] = None
) -> bool:
"""
Convert markdown to PDF using WeasyPrint.
Args:
markdown_path: Path to markdown file
output_path: Path for output PDF
css_style: Optional CSS stylesheet path
Returns:
True if successful, False otherwise
"""
try:
import markdown
from weasyprint import HTML, CSS
# Read markdown
with open(markdown_path, 'r') as f:
md_content = f.read()
# Convert to HTML
html_content = markdown.markdown(
md_content,
extensions=['tables', 'fenced_code', 'codehilite']
)
# Wrap in HTML template
html_template = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Biomni Report</title>
<style>
body {{
font-family: 'Helvetica', 'Arial', sans-serif;
line-height: 1.6;
color: #333;
max-width: 800px;
margin: 40px auto;
padding: 20px;
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
h2 {{
color: #34495e;
margin-top: 30px;
border-bottom: 1px solid #bdc3c7;
padding-bottom: 5px;
}}
h3 {{
color: #7f8c8d;
}}
code {{
background-color: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
}}
pre {{
background-color: #f4f4f4;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
}}
table {{
border-collapse: collapse;
width: 100%;
margin: 20px 0;
}}
th, td {{
border: 1px solid #ddd;
padding: 12px;
text-align: left;
}}
th {{
background-color: #3498db;
color: white;
}}
tr:nth-child(even) {{
background-color: #f9f9f9;
}}
.metadata {{
background-color: #ecf0f1;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}}
</style>
</head>
<body>
{html_content}
</body>
</html>
"""
# Generate PDF
pdf = HTML(string=html_template)
# Add custom CSS if provided
stylesheets = []
if css_style and Path(css_style).exists():
stylesheets.append(CSS(filename=css_style))
pdf.write_pdf(output_path, stylesheets=stylesheets)
return True
except ImportError:
print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
return False
except Exception as e:
print(f"Error generating PDF: {e}")
return False
def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
"""
Convert markdown to PDF using Pandoc.
Args:
markdown_path: Path to markdown file
output_path: Path for output PDF
Returns:
True if successful, False otherwise
"""
try:
import subprocess
# Check if pandoc is installed
result = subprocess.run(
['pandoc', '--version'],
capture_output=True,
text=True
)
if result.returncode != 0:
print("Error: Pandoc not installed")
return False
# Convert with pandoc
result = subprocess.run(
[
'pandoc',
markdown_path,
'-o', output_path,
'--pdf-engine=pdflatex',
'-V', 'geometry:margin=1in',
'--toc'
],
capture_output=True,
text=True
)
if result.returncode != 0:
print(f"Pandoc error: {result.stderr}")
return False
return True
except FileNotFoundError:
print("Error: Pandoc not found. Install from https://pandoc.org/")
return False
except Exception as e:
print(f"Error: {e}")
return False
def create_biomni_report(
conversation_history: list,
output_path: str = "biomni_report.pdf",
method: str = "weasyprint"
) -> bool:
"""
Create a formatted PDF report from Biomni conversation history.
Args:
conversation_history: List of conversation turns
output_path: Output PDF path
method: Conversion method ('weasyprint' or 'pandoc')
Returns:
True if successful
"""
# Prepare report sections
metadata = {
'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'Tool': 'Biomni AI Agent',
'Report Type': 'Analysis Summary'
}
sections = []
# Executive Summary
sections.append({
'heading': 'Executive Summary',
'level': 2,
'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
})
# Conversation history
for i, turn in enumerate(conversation_history, 1):
sections.append({
'heading': f'Task {i}: {turn.get("task", "Analysis")}',
'level': 2,
'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
})
# Generate markdown
md_path = output_path.replace('.pdf', '.md')
generate_markdown_report(
title="Biomni Analysis Report",
sections=sections,
metadata=metadata,
output_path=md_path
)
# Convert to PDF
if method == 'weasyprint':
success = convert_to_pdf_weasyprint(md_path, output_path)
elif method == 'pandoc':
success = convert_to_pdf_pandoc(md_path, output_path)
else:
print(f"Unknown method: {method}")
return False
if success:
print(f"✓ Report generated: {output_path}")
print(f" Markdown: {md_path}")
else:
print("✗ Failed to generate PDF")
print(f" Markdown available: {md_path}")
return success
def main():
"""CLI for report generation."""
parser = argparse.ArgumentParser(
description='Generate formatted PDF reports for Biomni analyses'
)
parser.add_argument(
'input',
type=str,
help='Input markdown file or conversation history'
)
parser.add_argument(
'-o', '--output',
type=str,
default='biomni_report.pdf',
help='Output PDF path (default: biomni_report.pdf)'
)
parser.add_argument(
'-m', '--method',
type=str,
choices=['weasyprint', 'pandoc'],
default='weasyprint',
help='Conversion method (default: weasyprint)'
)
parser.add_argument(
'--css',
type=str,
help='Custom CSS stylesheet path'
)
args = parser.parse_args()
# Check if input is markdown or conversation history
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {args.input}")
return 1
# If input is markdown, convert directly
if input_path.suffix == '.md':
if args.method == 'weasyprint':
success = convert_to_pdf_weasyprint(
str(input_path),
args.output,
args.css
)
else:
success = convert_to_pdf_pandoc(str(input_path), args.output)
return 0 if success else 1
# Otherwise, assume it's conversation history (JSON)
try:
import json
with open(input_path) as f:
history = json.load(f)
success = create_biomni_report(
history,
args.output,
args.method
)
return 0 if success else 1
except json.JSONDecodeError:
print("Error: Input file is not valid JSON or markdown")
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,230 @@
#!/usr/bin/env python3
"""
Biomni Environment Setup and Validation Script
This script helps users set up and validate their Biomni environment,
including checking dependencies, API keys, and data availability.
"""
import os
import sys
import subprocess
from pathlib import Path
from typing import Dict, List, Tuple
def check_python_version() -> Tuple[bool, str]:
"""Check if Python version is compatible."""
version = sys.version_info
if version.major == 3 and version.minor >= 8:
return True, f"Python {version.major}.{version.minor}.{version.micro}"
else:
return False, f"Python {version.major}.{version.minor} - requires Python 3.8+"
def check_conda_env() -> Tuple[bool, str]:
"""Check if running in biomni conda environment."""
conda_env = os.environ.get('CONDA_DEFAULT_ENV', None)
if conda_env == 'biomni_e1':
return True, f"Conda environment: {conda_env}"
else:
return False, f"Not in biomni_e1 environment (current: {conda_env})"
def check_package_installed(package: str) -> bool:
"""Check if a Python package is installed."""
try:
__import__(package)
return True
except ImportError:
return False
def check_dependencies() -> Tuple[bool, List[str]]:
"""Check for required and optional dependencies."""
required = ['biomni']
optional = ['weasyprint', 'markdown2pdf']
missing_required = [pkg for pkg in required if not check_package_installed(pkg)]
missing_optional = [pkg for pkg in optional if not check_package_installed(pkg)]
messages = []
success = len(missing_required) == 0
if missing_required:
messages.append(f"Missing required packages: {', '.join(missing_required)}")
messages.append("Install with: pip install biomni --upgrade")
else:
messages.append("Required packages: ✓")
if missing_optional:
messages.append(f"Missing optional packages: {', '.join(missing_optional)}")
messages.append("For PDF reports, install: pip install weasyprint")
return success, messages
def check_api_keys() -> Tuple[bool, Dict[str, bool]]:
"""Check which API keys are configured."""
api_keys = {
'ANTHROPIC_API_KEY': os.environ.get('ANTHROPIC_API_KEY'),
'OPENAI_API_KEY': os.environ.get('OPENAI_API_KEY'),
'GEMINI_API_KEY': os.environ.get('GEMINI_API_KEY'),
'GROQ_API_KEY': os.environ.get('GROQ_API_KEY'),
}
configured = {key: bool(value) for key, value in api_keys.items()}
has_any = any(configured.values())
return has_any, configured
def check_data_directory(data_path: str = './data') -> Tuple[bool, str]:
"""Check if Biomni data directory exists and has content."""
path = Path(data_path)
if not path.exists():
return False, f"Data directory not found at {data_path}"
# Check if directory has files (data has been downloaded)
files = list(path.glob('*'))
if len(files) == 0:
return False, f"Data directory exists but is empty. Run agent once to download."
# Rough size check (should be ~11GB)
total_size = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())
size_gb = total_size / (1024**3)
if size_gb < 1:
return False, f"Data directory exists but seems incomplete ({size_gb:.1f} GB)"
return True, f"Data directory: {data_path} ({size_gb:.1f} GB) ✓"
def check_disk_space(required_gb: float = 20) -> Tuple[bool, str]:
"""Check if sufficient disk space is available."""
try:
import shutil
stat = shutil.disk_usage('.')
free_gb = stat.free / (1024**3)
if free_gb >= required_gb:
return True, f"Disk space: {free_gb:.1f} GB available ✓"
else:
return False, f"Low disk space: {free_gb:.1f} GB (need {required_gb} GB)"
except Exception as e:
return False, f"Could not check disk space: {e}"
def test_biomni_import() -> Tuple[bool, str]:
"""Test if Biomni can be imported and initialized."""
try:
from biomni.agent import A1
from biomni.config import default_config
return True, "Biomni import successful ✓"
except ImportError as e:
return False, f"Cannot import Biomni: {e}"
except Exception as e:
return False, f"Biomni import error: {e}"
def suggest_fixes(results: Dict[str, Tuple[bool, any]]) -> List[str]:
"""Generate suggestions for fixing issues."""
suggestions = []
if not results['python'][0]:
suggestions.append("➜ Upgrade Python to 3.8 or higher")
if not results['conda'][0]:
suggestions.append("➜ Activate biomni environment: conda activate biomni_e1")
if not results['dependencies'][0]:
suggestions.append("➜ Install Biomni: pip install biomni --upgrade")
if not results['api_keys'][0]:
suggestions.append("➜ Set API key: export ANTHROPIC_API_KEY='your-key'")
suggestions.append(" Or create .env file with API keys")
if not results['data'][0]:
suggestions.append("➜ Data will auto-download on first agent.go() call")
if not results['disk_space'][0]:
suggestions.append("➜ Free up disk space (need ~20GB total)")
return suggestions
def main():
"""Run all environment checks and display results."""
print("=" * 60)
print("Biomni Environment Validation")
print("=" * 60)
print()
# Run all checks
results = {}
print("Checking Python version...")
results['python'] = check_python_version()
print(f" {results['python'][1]}")
print()
print("Checking conda environment...")
results['conda'] = check_conda_env()
print(f" {results['conda'][1]}")
print()
print("Checking dependencies...")
results['dependencies'] = check_dependencies()
for msg in results['dependencies'][1]:
print(f" {msg}")
print()
print("Checking API keys...")
results['api_keys'] = check_api_keys()
has_keys, key_status = results['api_keys']
for key, configured in key_status.items():
status = "" if configured else ""
print(f" {key}: {status}")
print()
print("Checking Biomni data directory...")
results['data'] = check_data_directory()
print(f" {results['data'][1]}")
print()
print("Checking disk space...")
results['disk_space'] = check_disk_space()
print(f" {results['disk_space'][1]}")
print()
print("Testing Biomni import...")
results['biomni_import'] = test_biomni_import()
print(f" {results['biomni_import'][1]}")
print()
# Summary
print("=" * 60)
all_passed = all(result[0] for result in results.values())
if all_passed:
print("✓ All checks passed! Environment is ready.")
print()
print("Quick start:")
print(" from biomni.agent import A1")
print(" agent = A1(path='./data', llm='claude-sonnet-4-20250514')")
print(" agent.go('Your biomedical task')")
else:
print("⚠ Some checks failed. See suggestions below:")
print()
suggestions = suggest_fixes(results)
for suggestion in suggestions:
print(suggestion)
print("=" * 60)
return 0 if all_passed else 1
if __name__ == "__main__":
sys.exit(main())