Add more scientific skills

2026-03-27 07:09:27 +08:00 · 2025-10-19 14:12:02 -07:00
parent 78d5ac2b56
commit 660c8574d0
210 changed files with 88957 additions and 1 deletions
--- a/scientific-packages/biomni/scripts/generate_report.py
+++ b/scientific-packages/biomni/scripts/generate_report.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python3
+"""
+Enhanced PDF Report Generation for Biomni
+
+This script provides advanced PDF report generation with custom formatting,
+styling, and metadata for Biomni analysis results.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from datetime import datetime
+from typing import Optional, Dict, Any
+
+
+def generate_markdown_report(
+    title: str,
+    sections: list,
+    metadata: Optional[Dict[str, Any]] = None,
+    output_path: str = "report.md"
+) -> str:
+    """
+    Generate a formatted markdown report.
+
+    Args:
+        title: Report title
+        sections: List of dicts with 'heading' and 'content' keys
+        metadata: Optional metadata dict (author, date, etc.)
+        output_path: Path to save markdown file
+
+    Returns:
+        Path to generated markdown file
+    """
+    md_content = []
+
+    # Title
+    md_content.append(f"# {title}\n")
+
+    # Metadata
+    if metadata:
+        md_content.append("---\n")
+        for key, value in metadata.items():
+            md_content.append(f"**{key}:** {value}  \n")
+        md_content.append("---\n\n")
+
+    # Sections
+    for section in sections:
+        heading = section.get('heading', 'Section')
+        content = section.get('content', '')
+        level = section.get('level', 2)  # Default to h2
+
+        md_content.append(f"{'#' * level} {heading}\n\n")
+        md_content.append(f"{content}\n\n")
+
+    # Write to file
+    output = Path(output_path)
+    output.write_text('\n'.join(md_content))
+
+    return str(output)
+
+
+def convert_to_pdf_weasyprint(
+    markdown_path: str,
+    output_path: str,
+    css_style: Optional[str] = None
+) -> bool:
+    """
+    Convert markdown to PDF using WeasyPrint.
+
+    Args:
+        markdown_path: Path to markdown file
+        output_path: Path for output PDF
+        css_style: Optional CSS stylesheet path
+
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        import markdown
+        from weasyprint import HTML, CSS
+
+        # Read markdown
+        with open(markdown_path, 'r') as f:
+            md_content = f.read()
+
+        # Convert to HTML
+        html_content = markdown.markdown(
+            md_content,
+            extensions=['tables', 'fenced_code', 'codehilite']
+        )
+
+        # Wrap in HTML template
+        html_template = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <meta charset="utf-8">
+            <title>Biomni Report</title>
+            <style>
+                body {{
+                    font-family: 'Helvetica', 'Arial', sans-serif;
+                    line-height: 1.6;
+                    color: #333;
+                    max-width: 800px;
+                    margin: 40px auto;
+                    padding: 20px;
+                }}
+                h1 {{
+                    color: #2c3e50;
+                    border-bottom: 3px solid #3498db;
+                    padding-bottom: 10px;
+                }}
+                h2 {{
+                    color: #34495e;
+                    margin-top: 30px;
+                    border-bottom: 1px solid #bdc3c7;
+                    padding-bottom: 5px;
+                }}
+                h3 {{
+                    color: #7f8c8d;
+                }}
+                code {{
+                    background-color: #f4f4f4;
+                    padding: 2px 6px;
+                    border-radius: 3px;
+                    font-family: 'Courier New', monospace;
+                }}
+                pre {{
+                    background-color: #f4f4f4;
+                    padding: 15px;
+                    border-radius: 5px;
+                    overflow-x: auto;
+                }}
+                table {{
+                    border-collapse: collapse;
+                    width: 100%;
+                    margin: 20px 0;
+                }}
+                th, td {{
+                    border: 1px solid #ddd;
+                    padding: 12px;
+                    text-align: left;
+                }}
+                th {{
+                    background-color: #3498db;
+                    color: white;
+                }}
+                tr:nth-child(even) {{
+                    background-color: #f9f9f9;
+                }}
+                .metadata {{
+                    background-color: #ecf0f1;
+                    padding: 15px;
+                    border-radius: 5px;
+                    margin: 20px 0;
+                }}
+            </style>
+        </head>
+        <body>
+            {html_content}
+        </body>
+        </html>
+        """
+
+        # Generate PDF
+        pdf = HTML(string=html_template)
+
+        # Add custom CSS if provided
+        stylesheets = []
+        if css_style and Path(css_style).exists():
+            stylesheets.append(CSS(filename=css_style))
+
+        pdf.write_pdf(output_path, stylesheets=stylesheets)
+
+        return True
+
+    except ImportError:
+        print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
+        return False
+    except Exception as e:
+        print(f"Error generating PDF: {e}")
+        return False
+
+
+def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
+    """
+    Convert markdown to PDF using Pandoc.
+
+    Args:
+        markdown_path: Path to markdown file
+        output_path: Path for output PDF
+
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        import subprocess
+
+        # Check if pandoc is installed
+        result = subprocess.run(
+            ['pandoc', '--version'],
+            capture_output=True,
+            text=True
+        )
+
+        if result.returncode != 0:
+            print("Error: Pandoc not installed")
+            return False
+
+        # Convert with pandoc
+        result = subprocess.run(
+            [
+                'pandoc',
+                markdown_path,
+                '-o', output_path,
+                '--pdf-engine=pdflatex',
+                '-V', 'geometry:margin=1in',
+                '--toc'
+            ],
+            capture_output=True,
+            text=True
+        )
+
+        if result.returncode != 0:
+            print(f"Pandoc error: {result.stderr}")
+            return False
+
+        return True
+
+    except FileNotFoundError:
+        print("Error: Pandoc not found. Install from https://pandoc.org/")
+        return False
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+
+
+def create_biomni_report(
+    conversation_history: list,
+    output_path: str = "biomni_report.pdf",
+    method: str = "weasyprint"
+) -> bool:
+    """
+    Create a formatted PDF report from Biomni conversation history.
+
+    Args:
+        conversation_history: List of conversation turns
+        output_path: Output PDF path
+        method: Conversion method ('weasyprint' or 'pandoc')
+
+    Returns:
+        True if successful
+    """
+    # Prepare report sections
+    metadata = {
+        'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+        'Tool': 'Biomni AI Agent',
+        'Report Type': 'Analysis Summary'
+    }
+
+    sections = []
+
+    # Executive Summary
+    sections.append({
+        'heading': 'Executive Summary',
+        'level': 2,
+        'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
+    })
+
+    # Conversation history
+    for i, turn in enumerate(conversation_history, 1):
+        sections.append({
+            'heading': f'Task {i}: {turn.get("task", "Analysis")}',
+            'level': 2,
+            'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
+        })
+
+    # Generate markdown
+    md_path = output_path.replace('.pdf', '.md')
+    generate_markdown_report(
+        title="Biomni Analysis Report",
+        sections=sections,
+        metadata=metadata,
+        output_path=md_path
+    )
+
+    # Convert to PDF
+    if method == 'weasyprint':
+        success = convert_to_pdf_weasyprint(md_path, output_path)
+    elif method == 'pandoc':
+        success = convert_to_pdf_pandoc(md_path, output_path)
+    else:
+        print(f"Unknown method: {method}")
+        return False
+
+    if success:
+        print(f"✓ Report generated: {output_path}")
+        print(f"  Markdown: {md_path}")
+    else:
+        print("✗ Failed to generate PDF")
+        print(f"  Markdown available: {md_path}")
+
+    return success
+
+
+def main():
+    """CLI for report generation."""
+    parser = argparse.ArgumentParser(
+        description='Generate formatted PDF reports for Biomni analyses'
+    )
+
+    parser.add_argument(
+        'input',
+        type=str,
+        help='Input markdown file or conversation history'
+    )
+
+    parser.add_argument(
+        '-o', '--output',
+        type=str,
+        default='biomni_report.pdf',
+        help='Output PDF path (default: biomni_report.pdf)'
+    )
+
+    parser.add_argument(
+        '-m', '--method',
+        type=str,
+        choices=['weasyprint', 'pandoc'],
+        default='weasyprint',
+        help='Conversion method (default: weasyprint)'
+    )
+
+    parser.add_argument(
+        '--css',
+        type=str,
+        help='Custom CSS stylesheet path'
+    )
+
+    args = parser.parse_args()
+
+    # Check if input is markdown or conversation history
+    input_path = Path(args.input)
+
+    if not input_path.exists():
+        print(f"Error: Input file not found: {args.input}")
+        return 1
+
+    # If input is markdown, convert directly
+    if input_path.suffix == '.md':
+        if args.method == 'weasyprint':
+            success = convert_to_pdf_weasyprint(
+                str(input_path),
+                args.output,
+                args.css
+            )
+        else:
+            success = convert_to_pdf_pandoc(str(input_path), args.output)
+
+        return 0 if success else 1
+
+    # Otherwise, assume it's conversation history (JSON)
+    try:
+        import json
+        with open(input_path) as f:
+            history = json.load(f)
+
+        success = create_biomni_report(
+            history,
+            args.output,
+            args.method
+        )
+
+        return 0 if success else 1
+
+    except json.JSONDecodeError:
+        print("Error: Input file is not valid JSON or markdown")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scientific-packages/biomni/scripts/setup_environment.py
+++ b/scientific-packages/biomni/scripts/setup_environment.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+"""
+Biomni Environment Setup and Validation Script
+
+This script helps users set up and validate their Biomni environment,
+including checking dependencies, API keys, and data availability.
+"""
+
+import os
+import sys
+import subprocess
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+
+def check_python_version() -> Tuple[bool, str]:
+    """Check if Python version is compatible."""
+    version = sys.version_info
+    if version.major == 3 and version.minor >= 8:
+        return True, f"Python {version.major}.{version.minor}.{version.micro} ✓"
+    else:
+        return False, f"Python {version.major}.{version.minor} - requires Python 3.8+"
+
+
+def check_conda_env() -> Tuple[bool, str]:
+    """Check if running in biomni conda environment."""
+    conda_env = os.environ.get('CONDA_DEFAULT_ENV', None)
+    if conda_env == 'biomni_e1':
+        return True, f"Conda environment: {conda_env} ✓"
+    else:
+        return False, f"Not in biomni_e1 environment (current: {conda_env})"
+
+
+def check_package_installed(package: str) -> bool:
+    """Check if a Python package is installed."""
+    try:
+        __import__(package)
+        return True
+    except ImportError:
+        return False
+
+
+def check_dependencies() -> Tuple[bool, List[str]]:
+    """Check for required and optional dependencies."""
+    required = ['biomni']
+    optional = ['weasyprint', 'markdown2pdf']
+
+    missing_required = [pkg for pkg in required if not check_package_installed(pkg)]
+    missing_optional = [pkg for pkg in optional if not check_package_installed(pkg)]
+
+    messages = []
+    success = len(missing_required) == 0
+
+    if missing_required:
+        messages.append(f"Missing required packages: {', '.join(missing_required)}")
+        messages.append("Install with: pip install biomni --upgrade")
+    else:
+        messages.append("Required packages: ✓")
+
+    if missing_optional:
+        messages.append(f"Missing optional packages: {', '.join(missing_optional)}")
+        messages.append("For PDF reports, install: pip install weasyprint")
+
+    return success, messages
+
+
+def check_api_keys() -> Tuple[bool, Dict[str, bool]]:
+    """Check which API keys are configured."""
+    api_keys = {
+        'ANTHROPIC_API_KEY': os.environ.get('ANTHROPIC_API_KEY'),
+        'OPENAI_API_KEY': os.environ.get('OPENAI_API_KEY'),
+        'GEMINI_API_KEY': os.environ.get('GEMINI_API_KEY'),
+        'GROQ_API_KEY': os.environ.get('GROQ_API_KEY'),
+    }
+
+    configured = {key: bool(value) for key, value in api_keys.items()}
+    has_any = any(configured.values())
+
+    return has_any, configured
+
+
+def check_data_directory(data_path: str = './data') -> Tuple[bool, str]:
+    """Check if Biomni data directory exists and has content."""
+    path = Path(data_path)
+
+    if not path.exists():
+        return False, f"Data directory not found at {data_path}"
+
+    # Check if directory has files (data has been downloaded)
+    files = list(path.glob('*'))
+    if len(files) == 0:
+        return False, f"Data directory exists but is empty. Run agent once to download."
+
+    # Rough size check (should be ~11GB)
+    total_size = sum(f.stat().st_size for f in path.rglob('*') if f.is_file())
+    size_gb = total_size / (1024**3)
+
+    if size_gb < 1:
+        return False, f"Data directory exists but seems incomplete ({size_gb:.1f} GB)"
+
+    return True, f"Data directory: {data_path} ({size_gb:.1f} GB) ✓"
+
+
+def check_disk_space(required_gb: float = 20) -> Tuple[bool, str]:
+    """Check if sufficient disk space is available."""
+    try:
+        import shutil
+        stat = shutil.disk_usage('.')
+        free_gb = stat.free / (1024**3)
+
+        if free_gb >= required_gb:
+            return True, f"Disk space: {free_gb:.1f} GB available ✓"
+        else:
+            return False, f"Low disk space: {free_gb:.1f} GB (need {required_gb} GB)"
+    except Exception as e:
+        return False, f"Could not check disk space: {e}"
+
+
+def test_biomni_import() -> Tuple[bool, str]:
+    """Test if Biomni can be imported and initialized."""
+    try:
+        from biomni.agent import A1
+        from biomni.config import default_config
+        return True, "Biomni import successful ✓"
+    except ImportError as e:
+        return False, f"Cannot import Biomni: {e}"
+    except Exception as e:
+        return False, f"Biomni import error: {e}"
+
+
+def suggest_fixes(results: Dict[str, Tuple[bool, any]]) -> List[str]:
+    """Generate suggestions for fixing issues."""
+    suggestions = []
+
+    if not results['python'][0]:
+        suggestions.append("➜ Upgrade Python to 3.8 or higher")
+
+    if not results['conda'][0]:
+        suggestions.append("➜ Activate biomni environment: conda activate biomni_e1")
+
+    if not results['dependencies'][0]:
+        suggestions.append("➜ Install Biomni: pip install biomni --upgrade")
+
+    if not results['api_keys'][0]:
+        suggestions.append("➜ Set API key: export ANTHROPIC_API_KEY='your-key'")
+        suggestions.append("   Or create .env file with API keys")
+
+    if not results['data'][0]:
+        suggestions.append("➜ Data will auto-download on first agent.go() call")
+
+    if not results['disk_space'][0]:
+        suggestions.append("➜ Free up disk space (need ~20GB total)")
+
+    return suggestions
+
+
+def main():
+    """Run all environment checks and display results."""
+    print("=" * 60)
+    print("Biomni Environment Validation")
+    print("=" * 60)
+    print()
+
+    # Run all checks
+    results = {}
+
+    print("Checking Python version...")
+    results['python'] = check_python_version()
+    print(f"  {results['python'][1]}")
+    print()
+
+    print("Checking conda environment...")
+    results['conda'] = check_conda_env()
+    print(f"  {results['conda'][1]}")
+    print()
+
+    print("Checking dependencies...")
+    results['dependencies'] = check_dependencies()
+    for msg in results['dependencies'][1]:
+        print(f"  {msg}")
+    print()
+
+    print("Checking API keys...")
+    results['api_keys'] = check_api_keys()
+    has_keys, key_status = results['api_keys']
+    for key, configured in key_status.items():
+        status = "✓" if configured else "✗"
+        print(f"  {key}: {status}")
+    print()
+
+    print("Checking Biomni data directory...")
+    results['data'] = check_data_directory()
+    print(f"  {results['data'][1]}")
+    print()
+
+    print("Checking disk space...")
+    results['disk_space'] = check_disk_space()
+    print(f"  {results['disk_space'][1]}")
+    print()
+
+    print("Testing Biomni import...")
+    results['biomni_import'] = test_biomni_import()
+    print(f"  {results['biomni_import'][1]}")
+    print()
+
+    # Summary
+    print("=" * 60)
+    all_passed = all(result[0] for result in results.values())
+
+    if all_passed:
+        print("✓ All checks passed! Environment is ready.")
+        print()
+        print("Quick start:")
+        print("  from biomni.agent import A1")
+        print("  agent = A1(path='./data', llm='claude-sonnet-4-20250514')")
+        print("  agent.go('Your biomedical task')")
+    else:
+        print("⚠ Some checks failed. See suggestions below:")
+        print()
+        suggestions = suggest_fixes(results)
+        for suggestion in suggestions:
+            print(suggestion)
+
+    print("=" * 60)
+
+    return 0 if all_passed else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())