Improved Biomni support

This commit is contained in:
Timothy Kassis
2025-10-22 08:38:06 -07:00
parent 71a3c3750f
commit 77822efeed
9 changed files with 2512 additions and 3393 deletions

649
scientific-packages/biomni/scripts/generate_report.py Normal file → Executable file
View File

@@ -1,381 +1,370 @@
#!/usr/bin/env python3
"""
Enhanced PDF Report Generation for Biomni
Enhanced PDF report generation for biomni conversation histories.
This script provides advanced PDF report generation with custom formatting,
styling, and metadata for Biomni analysis results.
This script provides additional customization options for biomni reports:
- Custom styling and branding
- Formatted code blocks
- Section organization
- Metadata inclusion
- Export format options (PDF, HTML, Markdown)
Usage:
python generate_report.py --input conversation.json --output report.pdf
python generate_report.py --agent-object agent --output report.pdf --format html
"""
import argparse
import sys
import json
from pathlib import Path
from typing import Dict, List, Optional, Any
from datetime import datetime
from typing import Optional, Dict, Any
def generate_markdown_report(
title: str,
sections: list,
metadata: Optional[Dict[str, Any]] = None,
output_path: str = "report.md"
def format_conversation_history(
messages: List[Dict[str, Any]],
include_metadata: bool = True,
include_code: bool = True,
include_timestamps: bool = False
) -> str:
"""
Generate a formatted markdown report.
Format conversation history into structured markdown.
Args:
title: Report title
sections: List of dicts with 'heading' and 'content' keys
metadata: Optional metadata dict (author, date, etc.)
output_path: Path to save markdown file
messages: List of conversation message dictionaries
include_metadata: Include metadata section
include_code: Include code blocks
include_timestamps: Include message timestamps
Returns:
Path to generated markdown file
Formatted markdown string
"""
md_content = []
# Title
md_content.append(f"# {title}\n")
# Metadata
if metadata:
md_content.append("---\n")
for key, value in metadata.items():
md_content.append(f"**{key}:** {value} \n")
md_content.append("---\n\n")
# Sections
for section in sections:
heading = section.get('heading', 'Section')
content = section.get('content', '')
level = section.get('level', 2) # Default to h2
md_content.append(f"{'#' * level} {heading}\n\n")
md_content.append(f"{content}\n\n")
# Write to file
output = Path(output_path)
output.write_text('\n'.join(md_content))
return str(output)
def convert_to_pdf_weasyprint(
markdown_path: str,
output_path: str,
css_style: Optional[str] = None
) -> bool:
"""
Convert markdown to PDF using WeasyPrint.
Args:
markdown_path: Path to markdown file
output_path: Path for output PDF
css_style: Optional CSS stylesheet path
Returns:
True if successful, False otherwise
"""
try:
import markdown
from weasyprint import HTML, CSS
# Read markdown
with open(markdown_path, 'r') as f:
md_content = f.read()
# Convert to HTML
html_content = markdown.markdown(
md_content,
extensions=['tables', 'fenced_code', 'codehilite']
)
# Wrap in HTML template
html_template = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Biomni Report</title>
<style>
body {{
font-family: 'Helvetica', 'Arial', sans-serif;
line-height: 1.6;
color: #333;
max-width: 800px;
margin: 40px auto;
padding: 20px;
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
h2 {{
color: #34495e;
margin-top: 30px;
border-bottom: 1px solid #bdc3c7;
padding-bottom: 5px;
}}
h3 {{
color: #7f8c8d;
}}
code {{
background-color: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Courier New', monospace;
}}
pre {{
background-color: #f4f4f4;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
}}
table {{
border-collapse: collapse;
width: 100%;
margin: 20px 0;
}}
th, td {{
border: 1px solid #ddd;
padding: 12px;
text-align: left;
}}
th {{
background-color: #3498db;
color: white;
}}
tr:nth-child(even) {{
background-color: #f9f9f9;
}}
.metadata {{
background-color: #ecf0f1;
padding: 15px;
border-radius: 5px;
margin: 20px 0;
}}
</style>
</head>
<body>
{html_content}
</body>
</html>
"""
# Generate PDF
pdf = HTML(string=html_template)
# Add custom CSS if provided
stylesheets = []
if css_style and Path(css_style).exists():
stylesheets.append(CSS(filename=css_style))
pdf.write_pdf(output_path, stylesheets=stylesheets)
return True
except ImportError:
print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
return False
except Exception as e:
print(f"Error generating PDF: {e}")
return False
def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
"""
Convert markdown to PDF using Pandoc.
Args:
markdown_path: Path to markdown file
output_path: Path for output PDF
Returns:
True if successful, False otherwise
"""
try:
import subprocess
# Check if pandoc is installed
result = subprocess.run(
['pandoc', '--version'],
capture_output=True,
text=True
)
if result.returncode != 0:
print("Error: Pandoc not installed")
return False
# Convert with pandoc
result = subprocess.run(
[
'pandoc',
markdown_path,
'-o', output_path,
'--pdf-engine=pdflatex',
'-V', 'geometry:margin=1in',
'--toc'
],
capture_output=True,
text=True
)
if result.returncode != 0:
print(f"Pandoc error: {result.stderr}")
return False
return True
except FileNotFoundError:
print("Error: Pandoc not found. Install from https://pandoc.org/")
return False
except Exception as e:
print(f"Error: {e}")
return False
def create_biomni_report(
conversation_history: list,
output_path: str = "biomni_report.pdf",
method: str = "weasyprint"
) -> bool:
"""
Create a formatted PDF report from Biomni conversation history.
Args:
conversation_history: List of conversation turns
output_path: Output PDF path
method: Conversion method ('weasyprint' or 'pandoc')
Returns:
True if successful
"""
# Prepare report sections
metadata = {
'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'Tool': 'Biomni AI Agent',
'Report Type': 'Analysis Summary'
}
sections = []
# Executive Summary
sections.append({
'heading': 'Executive Summary',
'level': 2,
'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
})
# Header
sections.append("# Biomni Analysis Report\n")
# Conversation history
for i, turn in enumerate(conversation_history, 1):
sections.append({
'heading': f'Task {i}: {turn.get("task", "Analysis")}',
'level': 2,
'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
})
# Metadata
if include_metadata:
sections.append("## Metadata\n")
sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
sections.append(f"- **Number of interactions**: {len(messages)}")
sections.append("\n---\n")
# Process messages
sections.append("## Analysis\n")
for i, msg in enumerate(messages, 1):
role = msg.get('role', 'unknown')
content = msg.get('content', '')
if role == 'user':
sections.append(f"### Task {i // 2 + 1}\n")
sections.append(f"**Query:**\n```\n{content}\n```\n")
elif role == 'assistant':
sections.append(f"**Response:**\n")
# Check if content contains code
if include_code and ('```' in content or 'import ' in content):
# Attempt to separate text and code
parts = content.split('```')
for j, part in enumerate(parts):
if j % 2 == 0:
# Text content
if part.strip():
sections.append(f"{part.strip()}\n")
else:
# Code content
# Check if language is specified
lines = part.split('\n', 1)
if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
lang = lines[0].strip()
code = lines[1]
else:
lang = 'python' # Default to python
code = part
sections.append(f"```{lang}\n{code}\n```\n")
else:
sections.append(f"{content}\n")
sections.append("\n---\n")
return '\n'.join(sections)
def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
"""
Convert markdown to styled HTML.
Args:
markdown_content: Markdown string
title: HTML page title
Returns:
HTML string
"""
# Simple markdown to HTML conversion
# For production use, consider using a library like markdown or mistune
html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{title}</title>
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
line-height: 1.6;
max-width: 900px;
margin: 0 auto;
padding: 20px;
color: #333;
}}
h1 {{
color: #2c3e50;
border-bottom: 3px solid #3498db;
padding-bottom: 10px;
}}
h2 {{
color: #34495e;
margin-top: 30px;
border-bottom: 2px solid #95a5a6;
padding-bottom: 5px;
}}
h3 {{
color: #555;
}}
code {{
background-color: #f4f4f4;
padding: 2px 6px;
border-radius: 3px;
font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
}}
pre {{
background-color: #f8f8f8;
border: 1px solid #ddd;
border-radius: 5px;
padding: 15px;
overflow-x: auto;
}}
pre code {{
background-color: transparent;
padding: 0;
}}
hr {{
border: none;
border-top: 1px solid #ddd;
margin: 30px 0;
}}
.metadata {{
background-color: #ecf0f1;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}}
.task {{
background-color: #e8f4f8;
padding: 10px;
border-left: 4px solid #3498db;
margin: 20px 0;
}}
.footer {{
margin-top: 50px;
text-align: center;
color: #7f8c8d;
font-size: 0.9em;
}}
</style>
</head>
<body>
<div class="content">
{markdown_to_html_simple(markdown_content)}
</div>
<div class="footer">
<p>Generated with Biomni | Stanford SNAP Lab</p>
<p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
</div>
</body>
</html>
"""
return html_template
def markdown_to_html_simple(md: str) -> str:
"""Simple markdown to HTML converter (basic implementation)."""
lines = md.split('\n')
html_lines = []
in_code_block = False
in_list = False
for line in lines:
# Code blocks
if line.startswith('```'):
if in_code_block:
html_lines.append('</code></pre>')
in_code_block = False
else:
lang = line[3:].strip()
html_lines.append(f'<pre><code class="language-{lang}">')
in_code_block = True
continue
if in_code_block:
html_lines.append(line)
continue
# Headers
if line.startswith('# '):
html_lines.append(f'<h1>{line[2:]}</h1>')
elif line.startswith('## '):
html_lines.append(f'<h2>{line[3:]}</h2>')
elif line.startswith('### '):
html_lines.append(f'<h3>{line[4:]}</h3>')
# Lists
elif line.startswith('- '):
if not in_list:
html_lines.append('<ul>')
in_list = True
html_lines.append(f'<li>{line[2:]}</li>')
else:
if in_list:
html_lines.append('</ul>')
in_list = False
# Horizontal rule
if line.strip() == '---':
html_lines.append('<hr>')
# Bold
elif '**' in line:
line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
html_lines.append(f'<p>{line}</p>')
# Regular paragraph
elif line.strip():
html_lines.append(f'<p>{line}</p>')
else:
html_lines.append('<br>')
if in_list:
html_lines.append('</ul>')
return '\n'.join(html_lines)
def generate_report(
conversation_data: Dict[str, Any],
output_path: Path,
format: str = 'markdown',
title: Optional[str] = None
):
"""
Generate formatted report from conversation data.
Args:
conversation_data: Conversation history dictionary
output_path: Output file path
format: Output format ('markdown', 'html', or 'pdf')
title: Report title
"""
messages = conversation_data.get('messages', [])
if not title:
title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"
# Generate markdown
md_path = output_path.replace('.pdf', '.md')
generate_markdown_report(
title="Biomni Analysis Report",
sections=sections,
metadata=metadata,
output_path=md_path
)
markdown_content = format_conversation_history(messages)
if format == 'markdown':
output_path.write_text(markdown_content)
print(f"✓ Markdown report saved to {output_path}")
elif format == 'html':
html_content = markdown_to_html(markdown_content, title)
output_path.write_text(html_content)
print(f"✓ HTML report saved to {output_path}")
elif format == 'pdf':
# For PDF generation, we'd typically use a library like weasyprint or reportlab
# This is a placeholder implementation
print("PDF generation requires additional dependencies (weasyprint or reportlab)")
print("Falling back to HTML format...")
html_path = output_path.with_suffix('.html')
html_content = markdown_to_html(markdown_content, title)
html_path.write_text(html_content)
print(f"✓ HTML report saved to {html_path}")
print(" To convert to PDF:")
print(f" 1. Install weasyprint: pip install weasyprint")
print(f" 2. Run: weasyprint {html_path} {output_path}")
# Convert to PDF
if method == 'weasyprint':
success = convert_to_pdf_weasyprint(md_path, output_path)
elif method == 'pandoc':
success = convert_to_pdf_pandoc(md_path, output_path)
else:
print(f"Unknown method: {method}")
return False
if success:
print(f"✓ Report generated: {output_path}")
print(f" Markdown: {md_path}")
else:
print("✗ Failed to generate PDF")
print(f" Markdown available: {md_path}")
return success
raise ValueError(f"Unsupported format: {format}")
def main():
"""CLI for report generation."""
"""Main entry point for CLI usage."""
parser = argparse.ArgumentParser(
description='Generate formatted PDF reports for Biomni analyses'
description="Generate enhanced reports from biomni conversation histories"
)
parser.add_argument(
'input',
type=str,
help='Input markdown file or conversation history'
'--input',
type=Path,
required=True,
help='Input conversation history JSON file'
)
parser.add_argument(
'-o', '--output',
type=str,
default='biomni_report.pdf',
help='Output PDF path (default: biomni_report.pdf)'
'--output',
type=Path,
required=True,
help='Output report file path'
)
parser.add_argument(
'-m', '--method',
type=str,
choices=['weasyprint', 'pandoc'],
default='weasyprint',
help='Conversion method (default: weasyprint)'
'--format',
choices=['markdown', 'html', 'pdf'],
default='markdown',
help='Output format (default: markdown)'
)
parser.add_argument(
'--css',
'--title',
type=str,
help='Custom CSS stylesheet path'
help='Report title (optional)'
)
args = parser.parse_args()
# Check if input is markdown or conversation history
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {args.input}")
return 1
# If input is markdown, convert directly
if input_path.suffix == '.md':
if args.method == 'weasyprint':
success = convert_to_pdf_weasyprint(
str(input_path),
args.output,
args.css
)
else:
success = convert_to_pdf_pandoc(str(input_path), args.output)
return 0 if success else 1
# Otherwise, assume it's conversation history (JSON)
# Load conversation data
try:
import json
with open(input_path) as f:
history = json.load(f)
success = create_biomni_report(
history,
args.output,
args.method
)
return 0 if success else 1
with open(args.input, 'r') as f:
conversation_data = json.load(f)
except FileNotFoundError:
print(f"❌ Input file not found: {args.input}")
return 1
except json.JSONDecodeError:
print("Error: Input file is not valid JSON or markdown")
print(f"❌ Invalid JSON in input file: {args.input}")
return 1
# Generate report
try:
generate_report(
conversation_data,
args.output,
format=args.format,
title=args.title
)
return 0
except Exception as e:
print(f"❌ Error generating report: {e}")
return 1
if __name__ == "__main__":
if __name__ == '__main__':
import sys
sys.exit(main())