mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-28 07:33:45 +08:00
Improved Biomni support
This commit is contained in:
649
scientific-packages/biomni/scripts/generate_report.py
Normal file → Executable file
649
scientific-packages/biomni/scripts/generate_report.py
Normal file → Executable file
@@ -1,381 +1,370 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced PDF Report Generation for Biomni
|
||||
Enhanced PDF report generation for biomni conversation histories.
|
||||
|
||||
This script provides advanced PDF report generation with custom formatting,
|
||||
styling, and metadata for Biomni analysis results.
|
||||
This script provides additional customization options for biomni reports:
|
||||
- Custom styling and branding
|
||||
- Formatted code blocks
|
||||
- Section organization
|
||||
- Metadata inclusion
|
||||
- Export format options (PDF, HTML, Markdown)
|
||||
|
||||
Usage:
|
||||
python generate_report.py --input conversation.json --output report.pdf
|
||||
python generate_report.py --agent-object agent --output report.pdf --format html
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
|
||||
def generate_markdown_report(
|
||||
title: str,
|
||||
sections: list,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
output_path: str = "report.md"
|
||||
def format_conversation_history(
|
||||
messages: List[Dict[str, Any]],
|
||||
include_metadata: bool = True,
|
||||
include_code: bool = True,
|
||||
include_timestamps: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Generate a formatted markdown report.
|
||||
Format conversation history into structured markdown.
|
||||
|
||||
Args:
|
||||
title: Report title
|
||||
sections: List of dicts with 'heading' and 'content' keys
|
||||
metadata: Optional metadata dict (author, date, etc.)
|
||||
output_path: Path to save markdown file
|
||||
messages: List of conversation message dictionaries
|
||||
include_metadata: Include metadata section
|
||||
include_code: Include code blocks
|
||||
include_timestamps: Include message timestamps
|
||||
|
||||
Returns:
|
||||
Path to generated markdown file
|
||||
Formatted markdown string
|
||||
"""
|
||||
md_content = []
|
||||
|
||||
# Title
|
||||
md_content.append(f"# {title}\n")
|
||||
|
||||
# Metadata
|
||||
if metadata:
|
||||
md_content.append("---\n")
|
||||
for key, value in metadata.items():
|
||||
md_content.append(f"**{key}:** {value} \n")
|
||||
md_content.append("---\n\n")
|
||||
|
||||
# Sections
|
||||
for section in sections:
|
||||
heading = section.get('heading', 'Section')
|
||||
content = section.get('content', '')
|
||||
level = section.get('level', 2) # Default to h2
|
||||
|
||||
md_content.append(f"{'#' * level} {heading}\n\n")
|
||||
md_content.append(f"{content}\n\n")
|
||||
|
||||
# Write to file
|
||||
output = Path(output_path)
|
||||
output.write_text('\n'.join(md_content))
|
||||
|
||||
return str(output)
|
||||
|
||||
|
||||
def convert_to_pdf_weasyprint(
|
||||
markdown_path: str,
|
||||
output_path: str,
|
||||
css_style: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Convert markdown to PDF using WeasyPrint.
|
||||
|
||||
Args:
|
||||
markdown_path: Path to markdown file
|
||||
output_path: Path for output PDF
|
||||
css_style: Optional CSS stylesheet path
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
import markdown
|
||||
from weasyprint import HTML, CSS
|
||||
|
||||
# Read markdown
|
||||
with open(markdown_path, 'r') as f:
|
||||
md_content = f.read()
|
||||
|
||||
# Convert to HTML
|
||||
html_content = markdown.markdown(
|
||||
md_content,
|
||||
extensions=['tables', 'fenced_code', 'codehilite']
|
||||
)
|
||||
|
||||
# Wrap in HTML template
|
||||
html_template = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Biomni Report</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: 'Helvetica', 'Arial', sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
max-width: 800px;
|
||||
margin: 40px auto;
|
||||
padding: 20px;
|
||||
}}
|
||||
h1 {{
|
||||
color: #2c3e50;
|
||||
border-bottom: 3px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}}
|
||||
h2 {{
|
||||
color: #34495e;
|
||||
margin-top: 30px;
|
||||
border-bottom: 1px solid #bdc3c7;
|
||||
padding-bottom: 5px;
|
||||
}}
|
||||
h3 {{
|
||||
color: #7f8c8d;
|
||||
}}
|
||||
code {{
|
||||
background-color: #f4f4f4;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
font-family: 'Courier New', monospace;
|
||||
}}
|
||||
pre {{
|
||||
background-color: #f4f4f4;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
overflow-x: auto;
|
||||
}}
|
||||
table {{
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
th, td {{
|
||||
border: 1px solid #ddd;
|
||||
padding: 12px;
|
||||
text-align: left;
|
||||
}}
|
||||
th {{
|
||||
background-color: #3498db;
|
||||
color: white;
|
||||
}}
|
||||
tr:nth-child(even) {{
|
||||
background-color: #f9f9f9;
|
||||
}}
|
||||
.metadata {{
|
||||
background-color: #ecf0f1;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{html_content}
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# Generate PDF
|
||||
pdf = HTML(string=html_template)
|
||||
|
||||
# Add custom CSS if provided
|
||||
stylesheets = []
|
||||
if css_style and Path(css_style).exists():
|
||||
stylesheets.append(CSS(filename=css_style))
|
||||
|
||||
pdf.write_pdf(output_path, stylesheets=stylesheets)
|
||||
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
print("Error: WeasyPrint not installed. Install with: pip install weasyprint")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error generating PDF: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def convert_to_pdf_pandoc(markdown_path: str, output_path: str) -> bool:
|
||||
"""
|
||||
Convert markdown to PDF using Pandoc.
|
||||
|
||||
Args:
|
||||
markdown_path: Path to markdown file
|
||||
output_path: Path for output PDF
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
import subprocess
|
||||
|
||||
# Check if pandoc is installed
|
||||
result = subprocess.run(
|
||||
['pandoc', '--version'],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print("Error: Pandoc not installed")
|
||||
return False
|
||||
|
||||
# Convert with pandoc
|
||||
result = subprocess.run(
|
||||
[
|
||||
'pandoc',
|
||||
markdown_path,
|
||||
'-o', output_path,
|
||||
'--pdf-engine=pdflatex',
|
||||
'-V', 'geometry:margin=1in',
|
||||
'--toc'
|
||||
],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"Pandoc error: {result.stderr}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
except FileNotFoundError:
|
||||
print("Error: Pandoc not found. Install from https://pandoc.org/")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def create_biomni_report(
|
||||
conversation_history: list,
|
||||
output_path: str = "biomni_report.pdf",
|
||||
method: str = "weasyprint"
|
||||
) -> bool:
|
||||
"""
|
||||
Create a formatted PDF report from Biomni conversation history.
|
||||
|
||||
Args:
|
||||
conversation_history: List of conversation turns
|
||||
output_path: Output PDF path
|
||||
method: Conversion method ('weasyprint' or 'pandoc')
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
# Prepare report sections
|
||||
metadata = {
|
||||
'Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'Tool': 'Biomni AI Agent',
|
||||
'Report Type': 'Analysis Summary'
|
||||
}
|
||||
|
||||
sections = []
|
||||
|
||||
# Executive Summary
|
||||
sections.append({
|
||||
'heading': 'Executive Summary',
|
||||
'level': 2,
|
||||
'content': 'This report contains the complete analysis workflow executed by the Biomni biomedical AI agent.'
|
||||
})
|
||||
# Header
|
||||
sections.append("# Biomni Analysis Report\n")
|
||||
|
||||
# Conversation history
|
||||
for i, turn in enumerate(conversation_history, 1):
|
||||
sections.append({
|
||||
'heading': f'Task {i}: {turn.get("task", "Analysis")}',
|
||||
'level': 2,
|
||||
'content': f'**Input:**\n```\n{turn.get("input", "")}\n```\n\n**Output:**\n{turn.get("output", "")}'
|
||||
})
|
||||
# Metadata
|
||||
if include_metadata:
|
||||
sections.append("## Metadata\n")
|
||||
sections.append(f"- **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
sections.append(f"- **Number of interactions**: {len(messages)}")
|
||||
sections.append("\n---\n")
|
||||
|
||||
# Process messages
|
||||
sections.append("## Analysis\n")
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
role = msg.get('role', 'unknown')
|
||||
content = msg.get('content', '')
|
||||
|
||||
if role == 'user':
|
||||
sections.append(f"### Task {i // 2 + 1}\n")
|
||||
sections.append(f"**Query:**\n```\n{content}\n```\n")
|
||||
|
||||
elif role == 'assistant':
|
||||
sections.append(f"**Response:**\n")
|
||||
|
||||
# Check if content contains code
|
||||
if include_code and ('```' in content or 'import ' in content):
|
||||
# Attempt to separate text and code
|
||||
parts = content.split('```')
|
||||
for j, part in enumerate(parts):
|
||||
if j % 2 == 0:
|
||||
# Text content
|
||||
if part.strip():
|
||||
sections.append(f"{part.strip()}\n")
|
||||
else:
|
||||
# Code content
|
||||
# Check if language is specified
|
||||
lines = part.split('\n', 1)
|
||||
if len(lines) > 1 and lines[0].strip() in ['python', 'r', 'bash', 'sql']:
|
||||
lang = lines[0].strip()
|
||||
code = lines[1]
|
||||
else:
|
||||
lang = 'python' # Default to python
|
||||
code = part
|
||||
|
||||
sections.append(f"```{lang}\n{code}\n```\n")
|
||||
else:
|
||||
sections.append(f"{content}\n")
|
||||
|
||||
sections.append("\n---\n")
|
||||
|
||||
return '\n'.join(sections)
|
||||
|
||||
|
||||
def markdown_to_html(markdown_content: str, title: str = "Biomni Report") -> str:
|
||||
"""
|
||||
Convert markdown to styled HTML.
|
||||
|
||||
Args:
|
||||
markdown_content: Markdown string
|
||||
title: HTML page title
|
||||
|
||||
Returns:
|
||||
HTML string
|
||||
"""
|
||||
# Simple markdown to HTML conversion
|
||||
# For production use, consider using a library like markdown or mistune
|
||||
|
||||
html_template = f"""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{title}</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
line-height: 1.6;
|
||||
max-width: 900px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
color: #333;
|
||||
}}
|
||||
h1 {{
|
||||
color: #2c3e50;
|
||||
border-bottom: 3px solid #3498db;
|
||||
padding-bottom: 10px;
|
||||
}}
|
||||
h2 {{
|
||||
color: #34495e;
|
||||
margin-top: 30px;
|
||||
border-bottom: 2px solid #95a5a6;
|
||||
padding-bottom: 5px;
|
||||
}}
|
||||
h3 {{
|
||||
color: #555;
|
||||
}}
|
||||
code {{
|
||||
background-color: #f4f4f4;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
font-family: 'Monaco', 'Menlo', 'Courier New', monospace;
|
||||
}}
|
||||
pre {{
|
||||
background-color: #f8f8f8;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 5px;
|
||||
padding: 15px;
|
||||
overflow-x: auto;
|
||||
}}
|
||||
pre code {{
|
||||
background-color: transparent;
|
||||
padding: 0;
|
||||
}}
|
||||
hr {{
|
||||
border: none;
|
||||
border-top: 1px solid #ddd;
|
||||
margin: 30px 0;
|
||||
}}
|
||||
.metadata {{
|
||||
background-color: #ecf0f1;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 20px;
|
||||
}}
|
||||
.task {{
|
||||
background-color: #e8f4f8;
|
||||
padding: 10px;
|
||||
border-left: 4px solid #3498db;
|
||||
margin: 20px 0;
|
||||
}}
|
||||
.footer {{
|
||||
margin-top: 50px;
|
||||
text-align: center;
|
||||
color: #7f8c8d;
|
||||
font-size: 0.9em;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="content">
|
||||
{markdown_to_html_simple(markdown_content)}
|
||||
</div>
|
||||
<div class="footer">
|
||||
<p>Generated with Biomni | Stanford SNAP Lab</p>
|
||||
<p><a href="https://github.com/snap-stanford/biomni">github.com/snap-stanford/biomni</a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return html_template
|
||||
|
||||
|
||||
def markdown_to_html_simple(md: str) -> str:
|
||||
"""Simple markdown to HTML converter (basic implementation)."""
|
||||
lines = md.split('\n')
|
||||
html_lines = []
|
||||
in_code_block = False
|
||||
in_list = False
|
||||
|
||||
for line in lines:
|
||||
# Code blocks
|
||||
if line.startswith('```'):
|
||||
if in_code_block:
|
||||
html_lines.append('</code></pre>')
|
||||
in_code_block = False
|
||||
else:
|
||||
lang = line[3:].strip()
|
||||
html_lines.append(f'<pre><code class="language-{lang}">')
|
||||
in_code_block = True
|
||||
continue
|
||||
|
||||
if in_code_block:
|
||||
html_lines.append(line)
|
||||
continue
|
||||
|
||||
# Headers
|
||||
if line.startswith('# '):
|
||||
html_lines.append(f'<h1>{line[2:]}</h1>')
|
||||
elif line.startswith('## '):
|
||||
html_lines.append(f'<h2>{line[3:]}</h2>')
|
||||
elif line.startswith('### '):
|
||||
html_lines.append(f'<h3>{line[4:]}</h3>')
|
||||
# Lists
|
||||
elif line.startswith('- '):
|
||||
if not in_list:
|
||||
html_lines.append('<ul>')
|
||||
in_list = True
|
||||
html_lines.append(f'<li>{line[2:]}</li>')
|
||||
else:
|
||||
if in_list:
|
||||
html_lines.append('</ul>')
|
||||
in_list = False
|
||||
|
||||
# Horizontal rule
|
||||
if line.strip() == '---':
|
||||
html_lines.append('<hr>')
|
||||
# Bold
|
||||
elif '**' in line:
|
||||
line = line.replace('**', '<strong>', 1).replace('**', '</strong>', 1)
|
||||
html_lines.append(f'<p>{line}</p>')
|
||||
# Regular paragraph
|
||||
elif line.strip():
|
||||
html_lines.append(f'<p>{line}</p>')
|
||||
else:
|
||||
html_lines.append('<br>')
|
||||
|
||||
if in_list:
|
||||
html_lines.append('</ul>')
|
||||
|
||||
return '\n'.join(html_lines)
|
||||
|
||||
|
||||
def generate_report(
|
||||
conversation_data: Dict[str, Any],
|
||||
output_path: Path,
|
||||
format: str = 'markdown',
|
||||
title: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Generate formatted report from conversation data.
|
||||
|
||||
Args:
|
||||
conversation_data: Conversation history dictionary
|
||||
output_path: Output file path
|
||||
format: Output format ('markdown', 'html', or 'pdf')
|
||||
title: Report title
|
||||
"""
|
||||
messages = conversation_data.get('messages', [])
|
||||
|
||||
if not title:
|
||||
title = f"Biomni Analysis - {datetime.now().strftime('%Y-%m-%d')}"
|
||||
|
||||
# Generate markdown
|
||||
md_path = output_path.replace('.pdf', '.md')
|
||||
generate_markdown_report(
|
||||
title="Biomni Analysis Report",
|
||||
sections=sections,
|
||||
metadata=metadata,
|
||||
output_path=md_path
|
||||
)
|
||||
markdown_content = format_conversation_history(messages)
|
||||
|
||||
if format == 'markdown':
|
||||
output_path.write_text(markdown_content)
|
||||
print(f"✓ Markdown report saved to {output_path}")
|
||||
|
||||
elif format == 'html':
|
||||
html_content = markdown_to_html(markdown_content, title)
|
||||
output_path.write_text(html_content)
|
||||
print(f"✓ HTML report saved to {output_path}")
|
||||
|
||||
elif format == 'pdf':
|
||||
# For PDF generation, we'd typically use a library like weasyprint or reportlab
|
||||
# This is a placeholder implementation
|
||||
print("PDF generation requires additional dependencies (weasyprint or reportlab)")
|
||||
print("Falling back to HTML format...")
|
||||
|
||||
html_path = output_path.with_suffix('.html')
|
||||
html_content = markdown_to_html(markdown_content, title)
|
||||
html_path.write_text(html_content)
|
||||
|
||||
print(f"✓ HTML report saved to {html_path}")
|
||||
print(" To convert to PDF:")
|
||||
print(f" 1. Install weasyprint: pip install weasyprint")
|
||||
print(f" 2. Run: weasyprint {html_path} {output_path}")
|
||||
|
||||
# Convert to PDF
|
||||
if method == 'weasyprint':
|
||||
success = convert_to_pdf_weasyprint(md_path, output_path)
|
||||
elif method == 'pandoc':
|
||||
success = convert_to_pdf_pandoc(md_path, output_path)
|
||||
else:
|
||||
print(f"Unknown method: {method}")
|
||||
return False
|
||||
|
||||
if success:
|
||||
print(f"✓ Report generated: {output_path}")
|
||||
print(f" Markdown: {md_path}")
|
||||
else:
|
||||
print("✗ Failed to generate PDF")
|
||||
print(f" Markdown available: {md_path}")
|
||||
|
||||
return success
|
||||
raise ValueError(f"Unsupported format: {format}")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI for report generation."""
|
||||
"""Main entry point for CLI usage."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate formatted PDF reports for Biomni analyses'
|
||||
description="Generate enhanced reports from biomni conversation histories"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'input',
|
||||
type=str,
|
||||
help='Input markdown file or conversation history'
|
||||
'--input',
|
||||
type=Path,
|
||||
required=True,
|
||||
help='Input conversation history JSON file'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output',
|
||||
type=str,
|
||||
default='biomni_report.pdf',
|
||||
help='Output PDF path (default: biomni_report.pdf)'
|
||||
'--output',
|
||||
type=Path,
|
||||
required=True,
|
||||
help='Output report file path'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-m', '--method',
|
||||
type=str,
|
||||
choices=['weasyprint', 'pandoc'],
|
||||
default='weasyprint',
|
||||
help='Conversion method (default: weasyprint)'
|
||||
'--format',
|
||||
choices=['markdown', 'html', 'pdf'],
|
||||
default='markdown',
|
||||
help='Output format (default: markdown)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--css',
|
||||
'--title',
|
||||
type=str,
|
||||
help='Custom CSS stylesheet path'
|
||||
help='Report title (optional)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check if input is markdown or conversation history
|
||||
input_path = Path(args.input)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input file not found: {args.input}")
|
||||
return 1
|
||||
|
||||
# If input is markdown, convert directly
|
||||
if input_path.suffix == '.md':
|
||||
if args.method == 'weasyprint':
|
||||
success = convert_to_pdf_weasyprint(
|
||||
str(input_path),
|
||||
args.output,
|
||||
args.css
|
||||
)
|
||||
else:
|
||||
success = convert_to_pdf_pandoc(str(input_path), args.output)
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
# Otherwise, assume it's conversation history (JSON)
|
||||
# Load conversation data
|
||||
try:
|
||||
import json
|
||||
with open(input_path) as f:
|
||||
history = json.load(f)
|
||||
|
||||
success = create_biomni_report(
|
||||
history,
|
||||
args.output,
|
||||
args.method
|
||||
)
|
||||
|
||||
return 0 if success else 1
|
||||
|
||||
with open(args.input, 'r') as f:
|
||||
conversation_data = json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"❌ Input file not found: {args.input}")
|
||||
return 1
|
||||
except json.JSONDecodeError:
|
||||
print("Error: Input file is not valid JSON or markdown")
|
||||
print(f"❌ Invalid JSON in input file: {args.input}")
|
||||
return 1
|
||||
|
||||
# Generate report
|
||||
try:
|
||||
generate_report(
|
||||
conversation_data,
|
||||
args.output,
|
||||
format=args.format,
|
||||
title=args.title
|
||||
)
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"❌ Error generating report: {e}")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user