Improved Biomni support

2026-03-28 07:33:45 +08:00 · 2025-10-22 08:38:06 -07:00
parent 71a3c3750f
commit 77822efeed
9 changed files with 2512 additions and 3393 deletions
--- a/scientific-packages/biomni/scripts/generate_report.py
+++ b/scientific-packages/biomni/scripts/generate_report.py
@@ -1,381 +1,370 @@
 #!/usr/bin/env python3
 """
-Enhanced PDF Report Generation for Biomni
+Enhanced PDF report generation for biomni conversation histories.

-This script provides advanced PDF report generation with custom formatting,
-styling, and metadata for Biomni analysis results.
+This script provides additional customization options for biomni reports:
+- Custom styling and branding
+- Formatted code blocks
+- Section organization
+- Metadata inclusion
+- Export format options (PDF, HTML, Markdown)
+
+Usage:
+    python generate_report.py --input conversation.json --output report.pdf
+    python generate_report.py --agent-object agent --output report.pdf --format html
 """

 import argparse
-import sys
+import json
 from pathlib import Path
+from typing import Dict, List, Optional, Any
 from datetime import datetime
-from typing import Optional, Dict, Any


-def generate_markdown_report(
-    title: str,
-    sections: list,
-    metadata: Optional[Dict[str, Any]] = None,
-    output_path: str = "report.md"
+def format_conversation_history(
+    messages: List[Dict[str, Any]],
+    include_metadata: bool = True,
+    include_code: bool = True,
+    include_timestamps: bool = False
 ) -> str:
    """
-    Generate a formatted markdown report.
+    Format conversation history into structured markdown.

    Args:
-        title: Report title
-        sections: List of dicts with 'heading' and 'content' keys
-        metadata: Optional metadata dict (author, date, etc.)
-        output_path: Path to save markdown file
+        messages: List of conversation message dictionaries
+        include_metadata: Include metadata section
+        include_code: Include code blocks
+        include_timestamps: Include message timestamps

    Returns:
-        Path to generated markdown file
+        Formatted markdown string
    """
-    md_content = []
-
-    # Title
-    md_content.append(f"# {title}\n")
-
-    # Metadata
-    if metadata:
-        md_content.append("---\n")
-        for key, value in metadata.items():
-            md_content.append(f"**{key}:** {value}  \n")
-        md_content.append("---\n\n")
-
-    # Sections
-    for section in sections:
-        heading = section.get('heading', 'Section')
-        content = section.get('content', '')
-        level = section.get('level', 2)  # Default to h2
-
-        md_content.append(f"{'#' * level} {heading}\n\n")
-        md_content.append(f"{content}\n\n")
-
-    # Write to file
-    output = Path(output_path)
-    output.write_text('\n'.join(md_content))
-
-    return str(output)
-
-
-def convert_to_pdf_weasyprint(
-    markdown_path: str,
-    output_path: str,
-    css_style: Optional[str] = None
-) -> bool:
-    """
-    Convert markdown to PDF using WeasyPrint.
-
-    Args:
-        markdown_path: Path to markdown file
-        output_path: Path for output PDF
-        css_style: Optional CSS stylesheet path
-
-    Returns:
-        True if successful, False otherwise
-    """
-    try:
-        import markdown
-        from weasyprint import HTML, CSS
-
-        # Read markdown
-        with open(markdown_path, 'r') as f:
-            md_content = f.read()
-
-        # Convert to HTML
-        html_content = markdown.markdown(
-            md_content,
-            extensions=['tables', 'fenced_code', 'codehilite']
-        )
-
-        # Wrap in HTML template
-        html_template = f"""
-        <!DOCTYPE html>
-        <html>
-        <head>
-            <meta charset="utf-8">
-            <title>Biomni Report</title>
-            <style>
-                body {{
-                    font-family: 'Helvetica', 'Arial', sans-serif;
-                    line-height: 1.6;
-                    color: #333;
-                    max-width: 800px;
-                    margin: 40px auto;
-                    padding: 20px;
-                }}
-                h1 {{
-                    color: #2c3e50;
-                    border-bottom: 3px solid #3498db;
-                    padding-bottom: 10px;
-                }}
-                h2 {{
-                    color: #34495e;
-                    margin-top: 30px;
-                    border-bottom: 1px solid #bdc3c7;
-                    padding-bottom: 5px;
-                }}
-                h3 {{
-                    color: #7f8c8d;
-                }}
-                code {{
-                    background-color: #f4f4f4;
-                    padding: 2px 6px;
-                    border-radius: 3px;
-                    font-family: 'Courier New', monospace;
-                }}
-                pre {{
-                    background-color: #f4f4f4;
-                    padding: 15px;
-                    border-radius: 5px;
-                    overflow-x: auto;
-                }}
-                table {{
-                    border-collapse: collapse;
-                    width: 100%;
-                    margin: 20px 0;
-                }}
-                th, td {{
-                    border: 1px solid #ddd;
-                    padding: 12px;
-                    text-align: left;
-                }}
-                th {{
-                    background-color: #3498db;
-                    color: white;
-                }}
-                tr:nth-child(even) {{
-                    background-color: #f9f9f9;
-                }}
-                .metadata {{
-                    background-color: #ecf0f1;
-                    padding: 15px;
-                    border-radius: 5px;
-                    margin: 20px 0;
-                }}
-            </style>
-        </head>
-        <body>
-            {html_content}
-        </body>
-        </html>
-        """
-
-        # Generate PDF
-        pdf = HTML(string=html_template)
-
-        # Add custom CSS if provided
-        stylesheets = []
-        if css_style and Path(css_style).exists():
-            stylesheets.append(CSS(filename=css_style))
-
-        pdf.write_pdf(output_path, stylesheets=stylesheets)
-
-        return True
-
-    except ImportError:
-        print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
-        return False
-    except Exception as e:
-        print(f"Error generating PDF: {e}")
-        return False
-
-
-def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
-    """
-    Convert markdown to PDF using Pandoc.
-
-    Args:
-        markdown_path: Path to markdown file
-        output_path: Path for output PDF
-
-    Returns:
-        True if successful, False otherwise
-    """
-    try:
-        import subprocess
-
-        # Check if pandoc is installed
-        result = subprocess.run(
-            ['pandoc', '--version'],
-            capture_output=True,
-            text=True
-        )
-
-        if result.returncode != 0:
-            print("Error: Pandoc not installed")
-            return False
-
-        # Convert with pandoc
-        result = subprocess.run(
-            [
-                'pandoc',
-                markdown_path,
-                '-o', output_path,
-                '--pdf-engine=pdflatex',
-                '-V', 'geometry:margin=1in',
-                '--toc'
-            ],
-            capture_output=True,
-            text=True
-        )
-
-        if result.returncode != 0:
-            print(f"Pandoc error: {result.stderr}")
-            return False
-
-        return True
-
-    except FileNotFoundError:
-        print("Error: Pandoc not found. Install from https://pandoc.org/")
-        return False
-    except Exception as e:
-        print(f"Error: {e}")
-        return False
-
-
-def create_biomni_report(
-    conversation_history: list,
-    output_path: str = "biomni_report.pdf",
-    method: str = "weasyprint"
-) -> bool:
-    """
-    Create a formatted PDF report from Biomni conversation history.
-
-    Args:
-        conversation_history: List of conversation turns
-        output_path: Output PDF path
-        method: Conversion method ('weasyprint' or 'pandoc')
-
-    Returns:
-        True if successful
-    """
-    # Prepare report sections
-    metadata = {
-        'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-        'Tool': 'Biomni AI Agent',
-        'Report Type': 'Analysis Summary'
-    }
-
    sections = []

-    # Executive Summary
-    sections.append({
-        'heading': 'Executive Summary',
-        'level': 2,
-        'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
-    })
+    # Header
+    sections.append("# Biomni Analysis Report\n")

-    # Conversation history
-    for i, turn in enumerate(conversation_history, 1):
-        sections.append({
-            'heading': f'Task {i}: {turn.get("task", "Analysis")}',
-            'level': 2,
-            'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
-        })
+    # Metadata
+    if include_metadata:
+        sections.append("## Metadata\n")
+        sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        sections.append(f"- **Number of interactions**: {len(messages)}")
+        sections.append("\n---\n")
+
+    # Process messages
+    sections.append("## Analysis\n")
+
+    for i, msg in enumerate(messages, 1):
+        role = msg.get('role', 'unknown')
+        content = msg.get('content', '')
+
+        if role == 'user':
+            sections.append(f"### Task {i // 2 + 1}\n")
+            sections.append(f"**Query:**\n```\n{content}\n```\n")
+
+        elif role == 'assistant':
+            sections.append(f"**Response:**\n")
+
+            # Check if content contains code
+            if include_code and ('```' in content or 'import ' in content):
+                # Attempt to separate text and code
+                parts = content.split('```')
+                for j, part in enumerate(parts):
+                    if j % 2 == 0:
+                        # Text content
+                        if part.strip():
+                            sections.append(f"{part.strip()}\n")
+                    else:
+                        # Code content
+                        # Check if language is specified
+                        lines = part.split('\n', 1)
+                        if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
+                            lang = lines[0].strip()
+                            code = lines[1]
+                        else:
+                            lang = 'python'  # Default to python
+                            code = part
+
+                        sections.append(f"```{lang}\n{code}\n```\n")
+            else:
+                sections.append(f"{content}\n")
+
+            sections.append("\n---\n")
+
+    return '\n'.join(sections)
+
+
+def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
+    """
+    Convert markdown to styled HTML.
+
+    Args:
+        markdown_content: Markdown string
+        title: HTML page title
+
+    Returns:
+        HTML string
+    """
+    # Simple markdown to HTML conversion
+    # For production use, consider using a library like markdown or mistune
+
+    html_template = f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{title}</title>
+    <style>
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+            line-height: 1.6;
+            max-width: 900px;
+            margin: 0 auto;
+            padding: 20px;
+            color: #333;
+        }}
+        h1 {{
+            color: #2c3e50;
+            border-bottom: 3px solid #3498db;
+            padding-bottom: 10px;
+        }}
+        h2 {{
+            color: #34495e;
+            margin-top: 30px;
+            border-bottom: 2px solid #95a5a6;
+            padding-bottom: 5px;
+        }}
+        h3 {{
+            color: #555;
+        }}
+        code {{
+            background-color: #f4f4f4;
+            padding: 2px 6px;
+            border-radius: 3px;
+            font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
+        }}
+        pre {{
+            background-color: #f8f8f8;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            padding: 15px;
+            overflow-x: auto;
+        }}
+        pre code {{
+            background-color: transparent;
+            padding: 0;
+        }}
+        hr {{
+            border: none;
+            border-top: 1px solid #ddd;
+            margin: 30px 0;
+        }}
+        .metadata {{
+            background-color: #ecf0f1;
+            padding: 15px;
+            border-radius: 5px;
+            margin-bottom: 20px;
+        }}
+        .task {{
+            background-color: #e8f4f8;
+            padding: 10px;
+            border-left: 4px solid #3498db;
+            margin: 20px 0;
+        }}
+        .footer {{
+            margin-top: 50px;
+            text-align: center;
+            color: #7f8c8d;
+            font-size: 0.9em;
+        }}
+    </style>
+</head>
+<body>
+    <div class="content">
+        {markdown_to_html_simple(markdown_content)}
+    </div>
+    <div class="footer">
+        <p>Generated with Biomni | Stanford SNAP Lab</p>
+        <p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
+    </div>
+</body>
+</html>
+"""
+    return html_template
+
+
+def markdown_to_html_simple(md: str) -> str:
+    """Simple markdown to HTML converter (basic implementation)."""
+    lines = md.split('\n')
+    html_lines = []
+    in_code_block = False
+    in_list = False
+
+    for line in lines:
+        # Code blocks
+        if line.startswith('```'):
+            if in_code_block:
+                html_lines.append('</code></pre>')
+                in_code_block = False
+            else:
+                lang = line[3:].strip()
+                html_lines.append(f'<pre><code class="language-{lang}">')
+                in_code_block = True
+            continue
+
+        if in_code_block:
+            html_lines.append(line)
+            continue
+
+        # Headers
+        if line.startswith('# '):
+            html_lines.append(f'<h1>{line[2:]}</h1>')
+        elif line.startswith('## '):
+            html_lines.append(f'<h2>{line[3:]}</h2>')
+        elif line.startswith('### '):
+            html_lines.append(f'<h3>{line[4:]}</h3>')
+        # Lists
+        elif line.startswith('- '):
+            if not in_list:
+                html_lines.append('<ul>')
+                in_list = True
+            html_lines.append(f'<li>{line[2:]}</li>')
+        else:
+            if in_list:
+                html_lines.append('</ul>')
+                in_list = False
+
+            # Horizontal rule
+            if line.strip() == '---':
+                html_lines.append('<hr>')
+            # Bold
+            elif '**' in line:
+                line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
+                html_lines.append(f'<p>{line}</p>')
+            # Regular paragraph
+            elif line.strip():
+                html_lines.append(f'<p>{line}</p>')
+            else:
+                html_lines.append('<br>')
+
+    if in_list:
+        html_lines.append('</ul>')
+
+    return '\n'.join(html_lines)
+
+
+def generate_report(
+    conversation_data: Dict[str, Any],
+    output_path: Path,
+    format: str = 'markdown',
+    title: Optional[str] = None
+):
+    """
+    Generate formatted report from conversation data.
+
+    Args:
+        conversation_data: Conversation history dictionary
+        output_path: Output file path
+        format: Output format ('markdown', 'html', or 'pdf')
+        title: Report title
+    """
+    messages = conversation_data.get('messages', [])
+
+    if not title:
+        title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"

    # Generate markdown
-    md_path = output_path.replace('.pdf', '.md')
-    generate_markdown_report(
-        title="Biomni Analysis Report",
-        sections=sections,
-        metadata=metadata,
-        output_path=md_path
-    )
+    markdown_content = format_conversation_history(messages)
+
+    if format == 'markdown':
+        output_path.write_text(markdown_content)
+        print(f"✓ Markdown report saved to {output_path}")
+
+    elif format == 'html':
+        html_content = markdown_to_html(markdown_content, title)
+        output_path.write_text(html_content)
+        print(f"✓ HTML report saved to {output_path}")
+
+    elif format == 'pdf':
+        # For PDF generation, we'd typically use a library like weasyprint or reportlab
+        # This is a placeholder implementation
+        print("PDF generation requires additional dependencies (weasyprint or reportlab)")
+        print("Falling back to HTML format...")
+
+        html_path = output_path.with_suffix('.html')
+        html_content = markdown_to_html(markdown_content, title)
+        html_path.write_text(html_content)
+
+        print(f"✓ HTML report saved to {html_path}")
+        print("  To convert to PDF:")
+        print(f"    1. Install weasyprint: pip install weasyprint")
+        print(f"    2. Run: weasyprint {html_path} {output_path}")

-    # Convert to PDF
-    if method == 'weasyprint':
-        success = convert_to_pdf_weasyprint(md_path, output_path)
-    elif method == 'pandoc':
-        success = convert_to_pdf_pandoc(md_path, output_path)
    else:
-        print(f"Unknown method: {method}")
-        return False
-
-    if success:
-        print(f"✓ Report generated: {output_path}")
-        print(f"  Markdown: {md_path}")
-    else:
-        print("✗ Failed to generate PDF")
-        print(f"  Markdown available: {md_path}")
-
-    return success
+        raise ValueError(f"Unsupported format: {format}")


 def main():
-    """CLI for report generation."""
+    """Main entry point for CLI usage."""
    parser = argparse.ArgumentParser(
-        description='Generate formatted PDF reports for Biomni analyses'
+        description="Generate enhanced reports from biomni conversation histories"
    )

    parser.add_argument(
-        'input',
-        type=str,
-        help='Input markdown file or conversation history'
+        '--input',
+        type=Path,
+        required=True,
+        help='Input conversation history JSON file'
    )

    parser.add_argument(
-        '-o', '--output',
-        type=str,
-        default='biomni_report.pdf',
-        help='Output PDF path (default: biomni_report.pdf)'
+        '--output',
+        type=Path,
+        required=True,
+        help='Output report file path'
    )

    parser.add_argument(
-        '-m', '--method',
-        type=str,
-        choices=['weasyprint', 'pandoc'],
-        default='weasyprint',
-        help='Conversion method (default: weasyprint)'
+        '--format',
+        choices=['markdown', 'html', 'pdf'],
+        default='markdown',
+        help='Output format (default: markdown)'
    )

    parser.add_argument(
-        '--css',
+        '--title',
        type=str,
-        help='Custom CSS stylesheet path'
+        help='Report title (optional)'
    )

    args = parser.parse_args()

-    # Check if input is markdown or conversation history
-    input_path = Path(args.input)
-
-    if not input_path.exists():
-        print(f"Error: Input file not found: {args.input}")
-        return 1
-
-    # If input is markdown, convert directly
-    if input_path.suffix == '.md':
-        if args.method == 'weasyprint':
-            success = convert_to_pdf_weasyprint(
-                str(input_path),
-                args.output,
-                args.css
-            )
-        else:
-            success = convert_to_pdf_pandoc(str(input_path), args.output)
-
-        return 0 if success else 1
-
-    # Otherwise, assume it's conversation history (JSON)
+    # Load conversation data
    try:
-        import json
-        with open(input_path) as f:
-            history = json.load(f)
-
-        success = create_biomni_report(
-            history,
-            args.output,
-            args.method
-        )
-
-        return 0 if success else 1
-
+        with open(args.input, 'r') as f:
+            conversation_data = json.load(f)
+    except FileNotFoundError:
+        print(f"❌ Input file not found: {args.input}")
+        return 1
    except json.JSONDecodeError:
-        print("Error: Input file is not valid JSON or markdown")
+        print(f"❌ Invalid JSON in input file: {args.input}")
+        return 1
+
+    # Generate report
+    try:
+        generate_report(
+            conversation_data,
+            args.output,
+            format=args.format,
+            title=args.title
+        )
+        return 0
+    except Exception as e:
+        print(f"❌ Error generating report: {e}")
        return 1


-if __name__ == "__main__":
+if __name__ == '__main__':
+    import sys
    sys.exit(main())