Initial commit for scientific-slides
This commit is contained in:
140
scripts/generate_slide_image.py
Normal file
140
scripts/generate_slide_image.py
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Slide image generation using Nano Banana Pro.
|
||||
|
||||
Generate presentation slides or visuals by describing them in natural language.
|
||||
Nano Banana Pro handles everything automatically with smart iterative refinement.
|
||||
|
||||
Two modes:
|
||||
- Default (full slide): Generate complete slides with title, content, visuals (for PDF workflow)
|
||||
- Visual only: Generate just images/figures to place on slides (for PPT workflow)
|
||||
|
||||
Supports attaching reference images for context (Nano Banana Pro will see these).
|
||||
|
||||
Usage:
|
||||
# Generate full slide for PDF workflow
|
||||
python generate_slide_image.py "Title: Introduction\\nKey points: AI, ML, Deep Learning" -o slide_01.png
|
||||
|
||||
# Generate visual only for PPT workflow
|
||||
python generate_slide_image.py "Neural network diagram" -o figure.png --visual-only
|
||||
|
||||
# With reference images attached
|
||||
python generate_slide_image.py "Create a slide about this data" -o slide.png --attach chart.png
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate presentation slides or visuals using Nano Banana Pro AI",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
How it works:
|
||||
Describe your slide or visual in natural language.
|
||||
Nano Banana Pro generates it automatically with:
|
||||
- Smart iteration (only regenerates if quality is below threshold)
|
||||
- Quality review by Gemini 3 Pro
|
||||
- Publication-ready output
|
||||
|
||||
Modes:
|
||||
Default (full slide): Generate complete slide with title, content, visuals
|
||||
Use for PDF workflow where each slide is an image
|
||||
|
||||
Visual only: Generate just the image/figure
|
||||
Use for PPT workflow where you add text separately
|
||||
|
||||
Attachments:
|
||||
Use --attach to provide reference images that Nano Banana Pro will see.
|
||||
This allows you to say "create a slide about this chart" and attach the chart.
|
||||
|
||||
Examples:
|
||||
# Full slide (default) - for PDF workflow
|
||||
python generate_slide_image.py "Title: Machine Learning\\nPoints: supervised, unsupervised, reinforcement" -o slide_01.png
|
||||
|
||||
# Visual only - for PPT workflow
|
||||
python generate_slide_image.py "Flowchart showing data pipeline" -o figure.png --visual-only
|
||||
|
||||
# With reference images attached
|
||||
python generate_slide_image.py "Create a slide explaining this chart" -o slide.png --attach chart.png
|
||||
python generate_slide_image.py "Combine these into a comparison" -o compare.png --attach before.png --attach after.png
|
||||
|
||||
# Multiple slides for PDF
|
||||
python generate_slide_image.py "Title slide: AI Conference 2025" -o slides/01_title.png
|
||||
python generate_slide_image.py "Title: Introduction\\nOverview of deep learning" -o slides/02_intro.png
|
||||
|
||||
Environment Variables:
|
||||
OPENROUTER_API_KEY Required for AI generation
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument("prompt", help="Description of the slide or visual to generate")
|
||||
parser.add_argument("-o", "--output", required=True, help="Output file path")
|
||||
parser.add_argument("--attach", action="append", dest="attachments", metavar="IMAGE",
|
||||
help="Attach image file(s) as context (can use multiple times)")
|
||||
parser.add_argument("--visual-only", action="store_true",
|
||||
help="Generate just the visual/figure (for PPT workflow)")
|
||||
parser.add_argument("--iterations", type=int, default=2,
|
||||
help="Maximum refinement iterations (default: 2, max: 2)")
|
||||
parser.add_argument("--api-key", help="OpenRouter API key (or use OPENROUTER_API_KEY env var)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for API key
|
||||
api_key = args.api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not api_key:
|
||||
print("Error: OPENROUTER_API_KEY environment variable not set")
|
||||
print("\nFor AI generation, you need an OpenRouter API key.")
|
||||
print("Get one at: https://openrouter.ai/keys")
|
||||
print("\nSet it with:")
|
||||
print(" export OPENROUTER_API_KEY='your_api_key'")
|
||||
print("\nOr use --api-key flag")
|
||||
sys.exit(1)
|
||||
|
||||
# Find AI generation script
|
||||
script_dir = Path(__file__).parent
|
||||
ai_script = script_dir / "generate_slide_image_ai.py"
|
||||
|
||||
if not ai_script.exists():
|
||||
print(f"Error: AI generation script not found: {ai_script}")
|
||||
sys.exit(1)
|
||||
|
||||
# Build command
|
||||
cmd = [sys.executable, str(ai_script), args.prompt, "-o", args.output]
|
||||
|
||||
# Add attachments
|
||||
if args.attachments:
|
||||
for att in args.attachments:
|
||||
cmd.extend(["--attach", att])
|
||||
|
||||
if args.visual_only:
|
||||
cmd.append("--visual-only")
|
||||
|
||||
# Enforce max 2 iterations
|
||||
iterations = min(args.iterations, 2)
|
||||
if iterations != 2:
|
||||
cmd.extend(["--iterations", str(iterations)])
|
||||
|
||||
if api_key:
|
||||
cmd.extend(["--api-key", api_key])
|
||||
|
||||
if args.verbose:
|
||||
cmd.append("-v")
|
||||
|
||||
# Execute
|
||||
try:
|
||||
result = subprocess.run(cmd, check=False)
|
||||
sys.exit(result.returncode)
|
||||
except Exception as e:
|
||||
print(f"Error executing AI generation: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
763
scripts/generate_slide_image_ai.py
Normal file
763
scripts/generate_slide_image_ai.py
Normal file
@@ -0,0 +1,763 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AI-powered slide image generation using Nano Banana Pro.
|
||||
|
||||
This script generates presentation slides or slide visuals using AI:
|
||||
- full_slide mode: Generate complete slides with title, content, and visuals (for PDF workflow)
|
||||
- visual_only mode: Generate just images/figures to place on slides (for PPT workflow)
|
||||
|
||||
Supports attaching reference images for context (e.g., "create a slide about this chart").
|
||||
|
||||
Uses smart iterative refinement:
|
||||
1. Generate initial image with Nano Banana Pro
|
||||
2. Quality review using Gemini 3 Pro
|
||||
3. Only regenerate if quality is below threshold
|
||||
4. Repeat until quality meets standards (max iterations)
|
||||
|
||||
Requirements:
|
||||
- OPENROUTER_API_KEY environment variable
|
||||
- requests library
|
||||
|
||||
Usage:
|
||||
# Full slide for PDF workflow
|
||||
python generate_slide_image_ai.py "Title: Introduction to ML\nKey points: supervised learning, neural networks" -o slide_01.png
|
||||
|
||||
# Visual only for PPT workflow
|
||||
python generate_slide_image_ai.py "Neural network architecture diagram" -o figure.png --visual-only
|
||||
|
||||
# With reference images attached
|
||||
python generate_slide_image_ai.py "Create a slide explaining this chart" -o slide.png --attach chart.png --attach logo.png
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List, Tuple
|
||||
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
print("Error: requests library not found. Install with: pip install requests")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _load_env_file():
|
||||
"""Load .env file from current directory, parent directories, or package directory."""
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
# Try current working directory first
|
||||
env_path = Path.cwd() / ".env"
|
||||
if env_path.exists():
|
||||
load_dotenv(dotenv_path=env_path, override=False)
|
||||
return True
|
||||
|
||||
# Try parent directories (up to 5 levels)
|
||||
cwd = Path.cwd()
|
||||
for _ in range(5):
|
||||
env_path = cwd / ".env"
|
||||
if env_path.exists():
|
||||
load_dotenv(dotenv_path=env_path, override=False)
|
||||
return True
|
||||
cwd = cwd.parent
|
||||
if cwd == cwd.parent:
|
||||
break
|
||||
|
||||
# Try the package's parent directory
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
for _ in range(5):
|
||||
env_path = script_dir / ".env"
|
||||
if env_path.exists():
|
||||
load_dotenv(dotenv_path=env_path, override=False)
|
||||
return True
|
||||
script_dir = script_dir.parent
|
||||
if script_dir == script_dir.parent:
|
||||
break
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class SlideImageGenerator:
|
||||
"""Generate presentation slides or visuals using AI with iterative refinement.
|
||||
|
||||
Two modes:
|
||||
- full_slide: Generate complete slide with title, content, visuals (for PDF workflow)
|
||||
- visual_only: Generate just the image/figure for a slide (for PPT workflow)
|
||||
"""
|
||||
|
||||
# Quality threshold for presentations (lower than journal/conference papers)
|
||||
QUALITY_THRESHOLD = 6.5
|
||||
|
||||
# Guidelines for generating full slides (complete slide images)
|
||||
FULL_SLIDE_GUIDELINES = """
|
||||
Create a professional presentation slide image with these requirements:
|
||||
|
||||
SLIDE LAYOUT (16:9 aspect ratio):
|
||||
- Clean, modern slide design
|
||||
- Clear visual hierarchy: title at top, content below
|
||||
- Generous margins (at least 5% on all sides)
|
||||
- Balanced composition with intentional white space
|
||||
|
||||
TYPOGRAPHY:
|
||||
- LARGE, bold title text (easily readable from distance)
|
||||
- Clear, sans-serif fonts throughout
|
||||
- High contrast text (dark on light or light on dark)
|
||||
- Bullet points or key phrases, NOT paragraphs
|
||||
- Maximum 5-6 lines of text content
|
||||
- Default author/presenter: "K-Dense" (use this unless another name is specified)
|
||||
|
||||
VISUAL ELEMENTS:
|
||||
- Use GENERIC, simple images and icons - avoid overly specific or detailed imagery
|
||||
- MINIMAL extra elements - no decorative borders, shadows, or flourishes
|
||||
- Visuals should support and enhance the message, not distract
|
||||
- Professional, clean aesthetic with restraint
|
||||
- Consistent color scheme (2-3 main colors only)
|
||||
- Prefer abstract/conceptual visuals over literal representations
|
||||
|
||||
PROFESSIONAL MINIMALISM:
|
||||
- Less is more: favor empty space over additional elements
|
||||
- No unnecessary decorations, gradients, or visual noise
|
||||
- Clean lines and simple shapes
|
||||
- Focused content without visual clutter
|
||||
- Corporate/academic level of professionalism
|
||||
|
||||
PRESENTATION QUALITY:
|
||||
- Designed for projection (high contrast)
|
||||
- Bold, impactful design that commands attention
|
||||
- Professional and polished appearance
|
||||
- No cluttered or busy layouts
|
||||
- Consistent styling throughout the deck
|
||||
"""
|
||||
|
||||
# Guidelines for generating slide visuals only (figures/images for PPT)
|
||||
VISUAL_ONLY_GUIDELINES = """
|
||||
Create a high-quality visual/figure for a presentation slide:
|
||||
|
||||
IMAGE QUALITY:
|
||||
- Clean, professional appearance
|
||||
- High resolution and sharp details
|
||||
- Suitable for embedding in a slide
|
||||
|
||||
DESIGN:
|
||||
- Simple, clear composition with MINIMAL elements
|
||||
- High contrast for projection readability
|
||||
- No text unless essential to the visual
|
||||
- Transparent or white background preferred
|
||||
- GENERIC imagery - avoid overly specific or detailed visuals
|
||||
|
||||
PROFESSIONAL MINIMALISM:
|
||||
- Favor simplicity over complexity
|
||||
- No decorative elements, shadows, or flourishes
|
||||
- Clean lines and simple shapes only
|
||||
- Remove any unnecessary visual noise
|
||||
- Abstract/conceptual rather than literal representations
|
||||
|
||||
STYLE:
|
||||
- Modern, professional aesthetic
|
||||
- Colorblind-friendly colors
|
||||
- Bold but restrained imagery
|
||||
- Suitable for scientific/professional presentations
|
||||
- Corporate/academic level of polish
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None, verbose: bool = False):
|
||||
"""
|
||||
Initialize the generator.
|
||||
|
||||
Args:
|
||||
api_key: OpenRouter API key (or use OPENROUTER_API_KEY env var)
|
||||
verbose: Print detailed progress information
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
|
||||
if not self.api_key:
|
||||
_load_env_file()
|
||||
self.api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError(
|
||||
"OPENROUTER_API_KEY not found. Please either:\n"
|
||||
" 1. Set the OPENROUTER_API_KEY environment variable\n"
|
||||
" 2. Add OPENROUTER_API_KEY to your .env file\n"
|
||||
" 3. Pass api_key parameter to the constructor\n"
|
||||
"Get your API key from: https://openrouter.ai/keys"
|
||||
)
|
||||
|
||||
self.verbose = verbose
|
||||
self._last_error = None
|
||||
self.base_url = "https://openrouter.ai/api/v1"
|
||||
# Nano Banana Pro for image generation
|
||||
self.image_model = "google/gemini-3-pro-image-preview"
|
||||
# Gemini 3 Pro for quality review
|
||||
self.review_model = "google/gemini-3-pro"
|
||||
|
||||
def _log(self, message: str):
|
||||
"""Log message if verbose mode is enabled."""
|
||||
if self.verbose:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] {message}")
|
||||
|
||||
def _make_request(self, model: str, messages: List[Dict[str, Any]],
|
||||
modalities: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""Make a request to OpenRouter API."""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://github.com/scientific-writer",
|
||||
"X-Title": "Scientific Slide Generator"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages
|
||||
}
|
||||
|
||||
if modalities:
|
||||
payload["modalities"] = modalities
|
||||
|
||||
self._log(f"Making request to {model}...")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=120
|
||||
)
|
||||
|
||||
try:
|
||||
response_json = response.json()
|
||||
except json.JSONDecodeError:
|
||||
response_json = {"raw_text": response.text[:500]}
|
||||
|
||||
if response.status_code != 200:
|
||||
error_detail = response_json.get("error", response_json)
|
||||
self._log(f"HTTP {response.status_code}: {error_detail}")
|
||||
raise RuntimeError(f"API request failed (HTTP {response.status_code}): {error_detail}")
|
||||
|
||||
return response_json
|
||||
except requests.exceptions.Timeout:
|
||||
raise RuntimeError("API request timed out after 120 seconds")
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise RuntimeError(f"API request failed: {str(e)}")
|
||||
|
||||
def _extract_image_from_response(self, response: Dict[str, Any]) -> Optional[bytes]:
|
||||
"""Extract base64-encoded image from API response."""
|
||||
try:
|
||||
choices = response.get("choices", [])
|
||||
if not choices:
|
||||
self._log("No choices in response")
|
||||
return None
|
||||
|
||||
message = choices[0].get("message", {})
|
||||
|
||||
# Nano Banana Pro returns images in the 'images' field
|
||||
images = message.get("images", [])
|
||||
if images and len(images) > 0:
|
||||
self._log(f"Found {len(images)} image(s) in 'images' field")
|
||||
|
||||
first_image = images[0]
|
||||
if isinstance(first_image, dict):
|
||||
if first_image.get("type") == "image_url":
|
||||
url = first_image.get("image_url", {})
|
||||
if isinstance(url, dict):
|
||||
url = url.get("url", "")
|
||||
|
||||
if url and url.startswith("data:image"):
|
||||
if "," in url:
|
||||
base64_str = url.split(",", 1)[1]
|
||||
base64_str = base64_str.replace('\n', '').replace('\r', '').replace(' ', '')
|
||||
self._log(f"Extracted base64 data (length: {len(base64_str)})")
|
||||
return base64.b64decode(base64_str)
|
||||
|
||||
# Fallback: check content field
|
||||
content = message.get("content", "")
|
||||
|
||||
if isinstance(content, str) and "data:image" in content:
|
||||
import re
|
||||
match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=\n\r]+)', content, re.DOTALL)
|
||||
if match:
|
||||
base64_str = match.group(1).replace('\n', '').replace('\r', '').replace(' ', '')
|
||||
self._log(f"Found image in content field (length: {len(base64_str)})")
|
||||
return base64.b64decode(base64_str)
|
||||
|
||||
if isinstance(content, list):
|
||||
for i, block in enumerate(content):
|
||||
if isinstance(block, dict) and block.get("type") == "image_url":
|
||||
url = block.get("image_url", {})
|
||||
if isinstance(url, dict):
|
||||
url = url.get("url", "")
|
||||
if url and url.startswith("data:image") and "," in url:
|
||||
base64_str = url.split(",", 1)[1].replace('\n', '').replace('\r', '').replace(' ', '')
|
||||
self._log(f"Found image in content block {i}")
|
||||
return base64.b64decode(base64_str)
|
||||
|
||||
self._log("No image data found in response")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
self._log(f"Error extracting image: {str(e)}")
|
||||
return None
|
||||
|
||||
def _image_to_base64(self, image_path: str) -> str:
|
||||
"""Convert image file to base64 data URL."""
|
||||
with open(image_path, "rb") as f:
|
||||
image_data = f.read()
|
||||
|
||||
ext = Path(image_path).suffix.lower()
|
||||
mime_type = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp"
|
||||
}.get(ext, "image/png")
|
||||
|
||||
base64_data = base64.b64encode(image_data).decode("utf-8")
|
||||
return f"data:{mime_type};base64,{base64_data}"
|
||||
|
||||
def generate_image(self, prompt: str, attachments: Optional[List[str]] = None) -> Optional[bytes]:
|
||||
"""
|
||||
Generate an image using Nano Banana Pro.
|
||||
|
||||
Args:
|
||||
prompt: Text description of the image to generate
|
||||
attachments: Optional list of image file paths to attach as context
|
||||
|
||||
Returns:
|
||||
Image bytes or None if generation failed
|
||||
"""
|
||||
self._last_error = None
|
||||
|
||||
# Build content with text and optional image attachments
|
||||
content = []
|
||||
|
||||
# Add text prompt
|
||||
content.append({
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
})
|
||||
|
||||
# Add attached images as context
|
||||
if attachments:
|
||||
for img_path in attachments:
|
||||
try:
|
||||
img_data_url = self._image_to_base64(img_path)
|
||||
content.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": img_data_url}
|
||||
})
|
||||
self._log(f"Attached image: {img_path}")
|
||||
except Exception as e:
|
||||
self._log(f"Warning: Could not attach {img_path}: {e}")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": content if attachments else prompt
|
||||
}
|
||||
]
|
||||
|
||||
try:
|
||||
response = self._make_request(
|
||||
model=self.image_model,
|
||||
messages=messages,
|
||||
modalities=["image", "text"]
|
||||
)
|
||||
|
||||
if self.verbose:
|
||||
self._log(f"Response keys: {response.keys()}")
|
||||
if "error" in response:
|
||||
self._log(f"API Error: {response['error']}")
|
||||
|
||||
if "error" in response:
|
||||
error_msg = response["error"]
|
||||
if isinstance(error_msg, dict):
|
||||
error_msg = error_msg.get("message", str(error_msg))
|
||||
self._last_error = f"API Error: {error_msg}"
|
||||
print(f"✗ {self._last_error}")
|
||||
return None
|
||||
|
||||
image_data = self._extract_image_from_response(response)
|
||||
if image_data:
|
||||
self._log(f"✓ Generated image ({len(image_data)} bytes)")
|
||||
else:
|
||||
self._last_error = "No image data in API response"
|
||||
self._log(f"✗ {self._last_error}")
|
||||
|
||||
return image_data
|
||||
except RuntimeError as e:
|
||||
self._last_error = str(e)
|
||||
self._log(f"✗ Generation failed: {self._last_error}")
|
||||
return None
|
||||
except Exception as e:
|
||||
self._last_error = f"Unexpected error: {str(e)}"
|
||||
self._log(f"✗ Generation failed: {self._last_error}")
|
||||
return None
|
||||
|
||||
def review_image(self, image_path: str, original_prompt: str,
|
||||
iteration: int, visual_only: bool = False,
|
||||
max_iterations: int = 2) -> Tuple[str, float, bool]:
|
||||
"""Review generated image using Gemini 3 Pro."""
|
||||
image_data_url = self._image_to_base64(image_path)
|
||||
threshold = self.QUALITY_THRESHOLD
|
||||
|
||||
image_type = "slide visual/figure" if visual_only else "presentation slide"
|
||||
|
||||
review_prompt = f"""You are an expert reviewer evaluating a {image_type} for presentation quality.
|
||||
|
||||
ORIGINAL REQUEST: {original_prompt}
|
||||
|
||||
QUALITY THRESHOLD: {threshold}/10
|
||||
ITERATION: {iteration}/{max_iterations}
|
||||
|
||||
Evaluate this {image_type} on these criteria:
|
||||
|
||||
1. **Visual Impact** (0-2 points)
|
||||
- Bold, attention-grabbing design
|
||||
- Professional appearance
|
||||
- Suitable for projection
|
||||
|
||||
2. **Clarity** (0-2 points)
|
||||
- Easy to understand at a glance
|
||||
- Clear visual hierarchy
|
||||
- Not cluttered or busy
|
||||
|
||||
3. **Readability** (0-2 points)
|
||||
- Text is large and readable (if present)
|
||||
- High contrast
|
||||
- Clean typography
|
||||
|
||||
4. **Composition** (0-2 points)
|
||||
- Balanced layout
|
||||
- Good use of space
|
||||
- Appropriate margins
|
||||
|
||||
5. **Relevance** (0-2 points)
|
||||
- Matches the requested content
|
||||
- Appropriate style for presentations
|
||||
- Professional quality
|
||||
|
||||
RESPOND IN THIS EXACT FORMAT:
|
||||
SCORE: [total score 0-10]
|
||||
|
||||
STRENGTHS:
|
||||
- [strength 1]
|
||||
- [strength 2]
|
||||
|
||||
ISSUES:
|
||||
- [issue 1 if any]
|
||||
- [issue 2 if any]
|
||||
|
||||
VERDICT: [ACCEPTABLE or NEEDS_IMPROVEMENT]
|
||||
|
||||
If score >= {threshold}, the image is ACCEPTABLE.
|
||||
If score < {threshold}, mark as NEEDS_IMPROVEMENT with specific suggestions."""
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": review_prompt},
|
||||
{"type": "image_url", "image_url": {"url": image_data_url}}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
try:
|
||||
response = self._make_request(model=self.review_model, messages=messages)
|
||||
|
||||
choices = response.get("choices", [])
|
||||
if not choices:
|
||||
return "Image generated successfully", 7.0, False
|
||||
|
||||
message = choices[0].get("message", {})
|
||||
content = message.get("content", "")
|
||||
|
||||
reasoning = message.get("reasoning", "")
|
||||
if reasoning and not content:
|
||||
content = reasoning
|
||||
|
||||
if isinstance(content, list):
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
content = "\n".join(text_parts)
|
||||
|
||||
# Extract score
|
||||
score = 7.0
|
||||
import re
|
||||
score_match = re.search(r'SCORE:\s*(\d+(?:\.\d+)?)', content, re.IGNORECASE)
|
||||
if score_match:
|
||||
score = float(score_match.group(1))
|
||||
else:
|
||||
score_match = re.search(r'(?:score|rating|quality)[:\s]+(\d+(?:\.\d+)?)', content, re.IGNORECASE)
|
||||
if score_match:
|
||||
score = float(score_match.group(1))
|
||||
|
||||
needs_improvement = False
|
||||
if "NEEDS_IMPROVEMENT" in content.upper():
|
||||
needs_improvement = True
|
||||
elif score < threshold:
|
||||
needs_improvement = True
|
||||
|
||||
self._log(f"✓ Review complete (Score: {score}/10, Threshold: {threshold}/10)")
|
||||
|
||||
return (content if content else "Image generated successfully", score, needs_improvement)
|
||||
except Exception as e:
|
||||
self._log(f"Review skipped: {str(e)}")
|
||||
return "Image generated successfully (review skipped)", 7.0, False
|
||||
|
||||
def improve_prompt(self, original_prompt: str, critique: str,
|
||||
iteration: int, visual_only: bool = False) -> str:
|
||||
"""Improve the generation prompt based on critique."""
|
||||
guidelines = self.VISUAL_ONLY_GUIDELINES if visual_only else self.FULL_SLIDE_GUIDELINES
|
||||
|
||||
return f"""{guidelines}
|
||||
|
||||
USER REQUEST: {original_prompt}
|
||||
|
||||
ITERATION {iteration}: Based on previous feedback, address these specific improvements:
|
||||
{critique}
|
||||
|
||||
Generate an improved version that addresses all the critique points."""
|
||||
|
||||
def generate_slide(self, user_prompt: str, output_path: str,
|
||||
visual_only: bool = False,
|
||||
iterations: int = 2,
|
||||
attachments: Optional[List[str]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a slide image or visual with iterative refinement.
|
||||
|
||||
Args:
|
||||
user_prompt: Description of the slide/visual to generate
|
||||
output_path: Path to save final image
|
||||
visual_only: If True, generate just the visual (for PPT workflow)
|
||||
iterations: Maximum refinement iterations (default: 2)
|
||||
attachments: Optional list of image file paths to attach as context
|
||||
|
||||
Returns:
|
||||
Dictionary with generation results and metadata
|
||||
"""
|
||||
output_path = Path(output_path)
|
||||
output_dir = output_path.parent
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
base_name = output_path.stem
|
||||
extension = output_path.suffix or ".png"
|
||||
|
||||
mode = "visual_only" if visual_only else "full_slide"
|
||||
guidelines = self.VISUAL_ONLY_GUIDELINES if visual_only else self.FULL_SLIDE_GUIDELINES
|
||||
|
||||
results = {
|
||||
"user_prompt": user_prompt,
|
||||
"mode": mode,
|
||||
"quality_threshold": self.QUALITY_THRESHOLD,
|
||||
"attachments": attachments or [],
|
||||
"iterations": [],
|
||||
"final_image": None,
|
||||
"final_score": 0.0,
|
||||
"success": False,
|
||||
"early_stop": False
|
||||
}
|
||||
|
||||
current_prompt = f"""{guidelines}
|
||||
|
||||
USER REQUEST: {user_prompt}
|
||||
|
||||
Generate a high-quality {'visual/figure' if visual_only else 'presentation slide'} that meets all the guidelines above."""
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Generating Slide {'Visual' if visual_only else 'Image'}")
|
||||
print(f"{'='*60}")
|
||||
print(f"Description: {user_prompt[:100]}{'...' if len(user_prompt) > 100 else ''}")
|
||||
print(f"Mode: {mode}")
|
||||
if attachments:
|
||||
print(f"Attachments: {len(attachments)} image(s)")
|
||||
for att in attachments:
|
||||
print(f" - {att}")
|
||||
print(f"Quality Threshold: {self.QUALITY_THRESHOLD}/10")
|
||||
print(f"Max Iterations: {iterations}")
|
||||
print(f"Output: {output_path}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Track temporary files for cleanup
|
||||
temp_files = []
|
||||
final_image_data = None
|
||||
|
||||
for i in range(1, iterations + 1):
|
||||
print(f"\n[Iteration {i}/{iterations}]")
|
||||
print("-" * 40)
|
||||
|
||||
print(f"Generating image with Nano Banana Pro...")
|
||||
image_data = self.generate_image(current_prompt, attachments=attachments)
|
||||
|
||||
if not image_data:
|
||||
error_msg = self._last_error or 'Image generation failed'
|
||||
print(f"✗ Generation failed: {error_msg}")
|
||||
results["iterations"].append({
|
||||
"iteration": i,
|
||||
"success": False,
|
||||
"error": error_msg
|
||||
})
|
||||
continue
|
||||
|
||||
# Save to temporary file for review (will be cleaned up)
|
||||
import tempfile
|
||||
temp_fd, temp_path = tempfile.mkstemp(suffix=extension)
|
||||
os.close(temp_fd)
|
||||
temp_path = Path(temp_path)
|
||||
temp_files.append(temp_path)
|
||||
|
||||
with open(temp_path, "wb") as f:
|
||||
f.write(image_data)
|
||||
print(f"✓ Generated image (iteration {i})")
|
||||
|
||||
print(f"Reviewing image with Gemini 3 Pro...")
|
||||
critique, score, needs_improvement = self.review_image(
|
||||
str(temp_path), user_prompt, i, visual_only, iterations
|
||||
)
|
||||
print(f"✓ Score: {score}/10 (threshold: {self.QUALITY_THRESHOLD}/10)")
|
||||
|
||||
results["iterations"].append({
|
||||
"iteration": i,
|
||||
"critique": critique,
|
||||
"score": score,
|
||||
"needs_improvement": needs_improvement,
|
||||
"success": True
|
||||
})
|
||||
|
||||
if not needs_improvement:
|
||||
print(f"\n✓ Quality meets threshold ({score} >= {self.QUALITY_THRESHOLD})")
|
||||
final_image_data = image_data
|
||||
results["final_score"] = score
|
||||
results["success"] = True
|
||||
results["early_stop"] = True
|
||||
break
|
||||
|
||||
if i == iterations:
|
||||
print(f"\n⚠ Maximum iterations reached")
|
||||
final_image_data = image_data
|
||||
results["final_score"] = score
|
||||
results["success"] = True
|
||||
break
|
||||
|
||||
print(f"\n⚠ Quality below threshold ({score} < {self.QUALITY_THRESHOLD})")
|
||||
print(f"Improving prompt...")
|
||||
current_prompt = self.improve_prompt(user_prompt, critique, i + 1, visual_only)
|
||||
|
||||
# Clean up temporary files
|
||||
for temp_file in temp_files:
|
||||
try:
|
||||
if temp_file.exists():
|
||||
temp_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Save only the final image to output path
|
||||
if results["success"] and final_image_data:
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(final_image_data)
|
||||
results["final_image"] = str(output_path)
|
||||
print(f"\n✓ Final image: {output_path}")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Generation Complete!")
|
||||
print(f"Final Score: {results['final_score']}/10")
|
||||
if results["early_stop"]:
|
||||
success_count = len([r for r in results['iterations'] if r.get('success')])
|
||||
print(f"Iterations Used: {success_count}/{iterations} (early stop)")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate presentation slides or visuals using Nano Banana Pro AI",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Generate a full slide (for PDF workflow)
|
||||
python generate_slide_image_ai.py "Title: Machine Learning Basics\\nKey points: supervised learning, neural networks, deep learning" -o slide_01.png
|
||||
|
||||
# Generate just a visual/figure (for PPT workflow)
|
||||
python generate_slide_image_ai.py "Neural network architecture diagram with input, hidden, and output layers" -o figure.png --visual-only
|
||||
|
||||
# With reference images attached (Nano Banana Pro will see these)
|
||||
python generate_slide_image_ai.py "Create a slide explaining this chart with key insights" -o slide.png --attach chart.png
|
||||
python generate_slide_image_ai.py "Combine these images into a comparison slide" -o compare.png --attach before.png --attach after.png
|
||||
|
||||
# With custom iterations
|
||||
python generate_slide_image_ai.py "Title slide for AI Conference 2025" -o title.png --iterations 2
|
||||
|
||||
# Verbose output
|
||||
python generate_slide_image_ai.py "Data flow diagram" -o flow.png -v
|
||||
|
||||
Environment:
|
||||
OPENROUTER_API_KEY OpenRouter API key (required)
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument("prompt", help="Description of the slide or visual to generate")
|
||||
parser.add_argument("-o", "--output", required=True, help="Output image path")
|
||||
parser.add_argument("--attach", action="append", dest="attachments", metavar="IMAGE",
|
||||
help="Attach image file(s) as context for generation (can use multiple times)")
|
||||
parser.add_argument("--visual-only", action="store_true",
|
||||
help="Generate just the visual/figure (for PPT workflow)")
|
||||
parser.add_argument("--iterations", type=int, default=2,
|
||||
help="Maximum refinement iterations (default: 2)")
|
||||
parser.add_argument("--api-key", help="OpenRouter API key (or set OPENROUTER_API_KEY)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
api_key = args.api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not api_key:
|
||||
print("Error: OPENROUTER_API_KEY environment variable not set")
|
||||
print("\nSet it with:")
|
||||
print(" export OPENROUTER_API_KEY='your_api_key'")
|
||||
sys.exit(1)
|
||||
|
||||
if args.iterations < 1 or args.iterations > 2:
|
||||
print("Error: Iterations must be between 1 and 2")
|
||||
sys.exit(1)
|
||||
|
||||
# Validate attachments exist
|
||||
if args.attachments:
|
||||
for att in args.attachments:
|
||||
if not Path(att).exists():
|
||||
print(f"Error: Attachment file not found: {att}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
generator = SlideImageGenerator(api_key=api_key, verbose=args.verbose)
|
||||
results = generator.generate_slide(
|
||||
user_prompt=args.prompt,
|
||||
output_path=args.output,
|
||||
visual_only=args.visual_only,
|
||||
iterations=args.iterations,
|
||||
attachments=args.attachments
|
||||
)
|
||||
|
||||
if results["success"]:
|
||||
print(f"\n✓ Success! Image saved to: {args.output}")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"\n✗ Generation failed. Check review log for details.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n✗ Error: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
221
scripts/pdf_to_images.py
Normal file
221
scripts/pdf_to_images.py
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PDF to Images Converter for Presentations
|
||||
|
||||
Converts presentation PDFs to images for visual inspection and review.
|
||||
Supports multiple output formats and resolutions.
|
||||
|
||||
Uses PyMuPDF (fitz) as the primary conversion method - no external
|
||||
dependencies required (no poppler, ghostscript, or ImageMagick needed).
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
|
||||
# Try to import pymupdf (preferred - no external dependencies)
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
HAS_PYMUPDF = True
|
||||
except ImportError:
|
||||
HAS_PYMUPDF = False
|
||||
|
||||
|
||||
class PDFToImagesConverter:
|
||||
"""Converts PDF presentations to images."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pdf_path: str,
|
||||
output_prefix: str,
|
||||
dpi: int = 150,
|
||||
format: str = 'jpg',
|
||||
first_page: Optional[int] = None,
|
||||
last_page: Optional[int] = None
|
||||
):
|
||||
self.pdf_path = Path(pdf_path)
|
||||
self.output_prefix = output_prefix
|
||||
self.dpi = dpi
|
||||
self.format = format.lower()
|
||||
self.first_page = first_page
|
||||
self.last_page = last_page
|
||||
|
||||
# Validate format
|
||||
if self.format not in ['jpg', 'jpeg', 'png']:
|
||||
raise ValueError(f"Unsupported format: {format}. Use jpg or png.")
|
||||
|
||||
def convert(self) -> List[Path]:
|
||||
"""Convert PDF to images using PyMuPDF."""
|
||||
if not self.pdf_path.exists():
|
||||
raise FileNotFoundError(f"PDF not found: {self.pdf_path}")
|
||||
|
||||
print(f"Converting: {self.pdf_path.name}")
|
||||
print(f"Output prefix: {self.output_prefix}")
|
||||
print(f"DPI: {self.dpi}")
|
||||
print(f"Format: {self.format}")
|
||||
|
||||
if HAS_PYMUPDF:
|
||||
return self._convert_with_pymupdf()
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"PyMuPDF not installed. Install it with:\n"
|
||||
" pip install pymupdf\n\n"
|
||||
"PyMuPDF is a self-contained library - no external dependencies needed."
|
||||
)
|
||||
|
||||
def _convert_with_pymupdf(self) -> List[Path]:
|
||||
"""Convert using PyMuPDF library (no external dependencies)."""
|
||||
print("Using PyMuPDF (no external dependencies required)...")
|
||||
|
||||
# Open the PDF
|
||||
doc = fitz.open(self.pdf_path)
|
||||
|
||||
# Determine page range
|
||||
start_page = (self.first_page - 1) if self.first_page else 0
|
||||
end_page = self.last_page if self.last_page else doc.page_count
|
||||
|
||||
# Calculate zoom factor from DPI (72 DPI is the base)
|
||||
zoom = self.dpi / 72
|
||||
matrix = fitz.Matrix(zoom, zoom)
|
||||
|
||||
output_files = []
|
||||
output_dir = Path(self.output_prefix).parent
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for page_num in range(start_page, end_page):
|
||||
page = doc[page_num]
|
||||
|
||||
# Render page to pixmap
|
||||
pixmap = page.get_pixmap(matrix=matrix)
|
||||
|
||||
# Determine output path
|
||||
output_path = Path(f"{self.output_prefix}-{page_num + 1:03d}.{self.format}")
|
||||
|
||||
# Save the image
|
||||
if self.format in ['jpg', 'jpeg']:
|
||||
pixmap.save(str(output_path), output="jpeg")
|
||||
else:
|
||||
pixmap.save(str(output_path), output="png")
|
||||
|
||||
output_files.append(output_path)
|
||||
print(f" Created: {output_path.name}")
|
||||
|
||||
doc.close()
|
||||
return output_files
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert presentation PDFs to images',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s presentation.pdf slides
|
||||
→ Creates slides-001.jpg, slides-002.jpg, ...
|
||||
|
||||
%(prog)s presentation.pdf output/slide --dpi 300 --format png
|
||||
→ Creates output/slide-001.png, slide-002.png, ... at high resolution
|
||||
|
||||
%(prog)s presentation.pdf review/s --first 5 --last 10
|
||||
→ Converts only slides 5-10
|
||||
|
||||
Output:
|
||||
Images are named: PREFIX-001.FORMAT, PREFIX-002.FORMAT, etc.
|
||||
|
||||
Resolution:
|
||||
- 150 DPI: Good for screen review (default)
|
||||
- 200 DPI: Higher quality for detailed inspection
|
||||
- 300 DPI: Print quality (larger files)
|
||||
|
||||
Requirements:
|
||||
Install PyMuPDF (no external dependencies needed):
|
||||
pip install pymupdf
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'pdf_path',
|
||||
help='Path to PDF presentation'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'output_prefix',
|
||||
help='Output filename prefix (e.g., "slides" or "output/slide")'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--dpi', '-r',
|
||||
type=int,
|
||||
default=150,
|
||||
help='Resolution in DPI (default: 150)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--format', '-f',
|
||||
choices=['jpg', 'jpeg', 'png'],
|
||||
default='jpg',
|
||||
help='Output format (default: jpg)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--first',
|
||||
type=int,
|
||||
help='First page to convert (1-indexed)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--last',
|
||||
type=int,
|
||||
help='Last page to convert (1-indexed)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create output directory if needed
|
||||
output_dir = Path(args.output_prefix).parent
|
||||
if output_dir != Path('.'):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Convert
|
||||
try:
|
||||
converter = PDFToImagesConverter(
|
||||
pdf_path=args.pdf_path,
|
||||
output_prefix=args.output_prefix,
|
||||
dpi=args.dpi,
|
||||
format=args.format,
|
||||
first_page=args.first,
|
||||
last_page=args.last
|
||||
)
|
||||
|
||||
output_files = converter.convert()
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"✅ Success! Created {len(output_files)} image(s)")
|
||||
print("=" * 60)
|
||||
|
||||
if output_files:
|
||||
print(f"\nFirst image: {output_files[0]}")
|
||||
print(f"Last image: {output_files[-1]}")
|
||||
|
||||
# Calculate total size
|
||||
total_size = sum(f.stat().st_size for f in output_files)
|
||||
size_mb = total_size / (1024 * 1024)
|
||||
print(f"Total size: {size_mb:.2f} MB")
|
||||
|
||||
print("\nNext steps:")
|
||||
print(" 1. Review images for layout issues")
|
||||
print(" 2. Check for text overflow or element overlap")
|
||||
print(" 3. Verify readability from distance")
|
||||
print(" 4. Document issues with slide numbers")
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n❌ Error: {str(e)}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
235
scripts/slides_to_pdf.py
Normal file
235
scripts/slides_to_pdf.py
Normal file
@@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Combine slide images into a single PDF presentation.
|
||||
|
||||
This script takes multiple slide images (PNG, JPG) and combines them
|
||||
into a single PDF file, maintaining aspect ratio and quality.
|
||||
|
||||
Usage:
|
||||
# Combine all PNG files in a directory
|
||||
python slides_to_pdf.py slides/*.png -o presentation.pdf
|
||||
|
||||
# Combine specific files in order
|
||||
python slides_to_pdf.py slide_01.png slide_02.png slide_03.png -o presentation.pdf
|
||||
|
||||
# From a directory (sorted by filename)
|
||||
python slides_to_pdf.py slides/ -o presentation.pdf
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
print("Error: Pillow library not found. Install with: pip install Pillow")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_image_files(paths: List[str]) -> List[Path]:
|
||||
"""
|
||||
Get list of image files from paths (files or directories).
|
||||
|
||||
Args:
|
||||
paths: List of file paths or directory paths
|
||||
|
||||
Returns:
|
||||
Sorted list of image file paths
|
||||
"""
|
||||
image_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'}
|
||||
image_files = []
|
||||
|
||||
for path_str in paths:
|
||||
path = Path(path_str)
|
||||
|
||||
if path.is_file():
|
||||
if path.suffix.lower() in image_extensions:
|
||||
image_files.append(path)
|
||||
else:
|
||||
print(f"Warning: Skipping non-image file: {path}")
|
||||
elif path.is_dir():
|
||||
# Get all images in directory
|
||||
for ext in image_extensions:
|
||||
image_files.extend(path.glob(f"*{ext}"))
|
||||
image_files.extend(path.glob(f"*{ext.upper()}"))
|
||||
else:
|
||||
# Try glob pattern
|
||||
parent = path.parent
|
||||
pattern = path.name
|
||||
if parent.exists():
|
||||
matches = list(parent.glob(pattern))
|
||||
for match in matches:
|
||||
if match.suffix.lower() in image_extensions:
|
||||
image_files.append(match)
|
||||
|
||||
# Remove duplicates and sort
|
||||
image_files = list(set(image_files))
|
||||
image_files.sort(key=lambda x: x.name)
|
||||
|
||||
return image_files
|
||||
|
||||
|
||||
def combine_images_to_pdf(image_paths: List[Path], output_path: Path,
|
||||
dpi: int = 150, verbose: bool = False) -> bool:
|
||||
"""
|
||||
Combine multiple images into a single PDF.
|
||||
|
||||
Args:
|
||||
image_paths: List of image file paths
|
||||
output_path: Output PDF path
|
||||
dpi: Resolution for the PDF (default: 150)
|
||||
verbose: Print progress information
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not image_paths:
|
||||
print("Error: No image files found")
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print(f"Combining {len(image_paths)} images into PDF...")
|
||||
|
||||
# Load all images
|
||||
images = []
|
||||
for i, img_path in enumerate(image_paths):
|
||||
try:
|
||||
img = Image.open(img_path)
|
||||
# Convert to RGB if necessary (PDF doesn't support RGBA)
|
||||
if img.mode in ('RGBA', 'P'):
|
||||
# Create white background
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
|
||||
img = background
|
||||
elif img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
|
||||
images.append(img)
|
||||
|
||||
if verbose:
|
||||
print(f" [{i+1}/{len(image_paths)}] Loaded: {img_path.name} ({img.size[0]}x{img.size[1]})")
|
||||
except Exception as e:
|
||||
print(f"Error loading {img_path}: {e}")
|
||||
return False
|
||||
|
||||
if not images:
|
||||
print("Error: No images could be loaded")
|
||||
return False
|
||||
|
||||
# Create output directory if needed
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save as PDF
|
||||
try:
|
||||
# First image
|
||||
first_image = images[0]
|
||||
|
||||
# Remaining images (if any)
|
||||
remaining_images = images[1:] if len(images) > 1 else []
|
||||
|
||||
# Save to PDF
|
||||
first_image.save(
|
||||
output_path,
|
||||
"PDF",
|
||||
resolution=dpi,
|
||||
save_all=True,
|
||||
append_images=remaining_images
|
||||
)
|
||||
|
||||
if verbose:
|
||||
print(f"\n✓ PDF created: {output_path}")
|
||||
print(f" Total slides: {len(images)}")
|
||||
file_size = output_path.stat().st_size
|
||||
if file_size > 1024 * 1024:
|
||||
print(f" File size: {file_size / (1024 * 1024):.1f} MB")
|
||||
else:
|
||||
print(f" File size: {file_size / 1024:.1f} KB")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error creating PDF: {e}")
|
||||
return False
|
||||
finally:
|
||||
# Close all images
|
||||
for img in images:
|
||||
img.close()
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Combine slide images into a single PDF presentation",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Combine PNG files using glob pattern
|
||||
python slides_to_pdf.py slides/*.png -o presentation.pdf
|
||||
|
||||
# Combine specific files in order
|
||||
python slides_to_pdf.py title.png intro.png methods.png results.png -o talk.pdf
|
||||
|
||||
# Combine all images from a directory (sorted by filename)
|
||||
python slides_to_pdf.py slides/ -o presentation.pdf
|
||||
|
||||
# With custom DPI and verbose output
|
||||
python slides_to_pdf.py slides/*.png -o presentation.pdf --dpi 200 -v
|
||||
|
||||
Supported formats: PNG, JPG, JPEG, GIF, WEBP, BMP
|
||||
|
||||
Tips:
|
||||
- Name your slide images with numbers for correct ordering:
|
||||
01_title.png, 02_intro.png, 03_methods.png, etc.
|
||||
- Use the generate_slide_image.py script to create slides first
|
||||
- Standard presentation aspect ratio is 16:9 (1920x1080 or 1280x720)
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument("images", nargs="+",
|
||||
help="Image files, directories, or glob patterns")
|
||||
parser.add_argument("-o", "--output", required=True,
|
||||
help="Output PDF file path")
|
||||
parser.add_argument("--dpi", type=int, default=150,
|
||||
help="PDF resolution in DPI (default: 150)")
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
help="Verbose output")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get image files
|
||||
image_files = get_image_files(args.images)
|
||||
|
||||
if not image_files:
|
||||
print("Error: No image files found matching the specified paths")
|
||||
print("\nUsage examples:")
|
||||
print(" python slides_to_pdf.py slides/*.png -o presentation.pdf")
|
||||
print(" python slides_to_pdf.py slide1.png slide2.png -o presentation.pdf")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Found {len(image_files)} image(s)")
|
||||
if args.verbose:
|
||||
for f in image_files:
|
||||
print(f" - {f}")
|
||||
|
||||
# Combine into PDF
|
||||
output_path = Path(args.output)
|
||||
success = combine_images_to_pdf(
|
||||
image_files,
|
||||
output_path,
|
||||
dpi=args.dpi,
|
||||
verbose=args.verbose
|
||||
)
|
||||
|
||||
if success:
|
||||
print(f"\n✓ PDF created: {output_path}")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(f"\n✗ Failed to create PDF")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
403
scripts/validate_presentation.py
Normal file
403
scripts/validate_presentation.py
Normal file
@@ -0,0 +1,403 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Presentation Validation Script
|
||||
|
||||
Validates scientific presentations for common issues:
|
||||
- Slide count vs. duration
|
||||
- LaTeX compilation
|
||||
- File size checks
|
||||
- Basic format validation
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
|
||||
# Try to import PyPDF2 for PDF analysis
|
||||
try:
|
||||
import PyPDF2
|
||||
HAS_PYPDF2 = True
|
||||
except ImportError:
|
||||
HAS_PYPDF2 = False
|
||||
|
||||
# Try to import python-pptx for PowerPoint analysis
|
||||
try:
|
||||
from pptx import Presentation
|
||||
HAS_PPTX = True
|
||||
except ImportError:
|
||||
HAS_PPTX = False
|
||||
|
||||
|
||||
class PresentationValidator:
|
||||
"""Validates presentations for common issues."""
|
||||
|
||||
# Recommended slide counts by duration (min, recommended, max)
|
||||
SLIDE_GUIDELINES = {
|
||||
5: (5, 6, 8),
|
||||
10: (8, 11, 14),
|
||||
15: (13, 16, 20),
|
||||
20: (18, 22, 26),
|
||||
30: (22, 27, 33),
|
||||
45: (32, 40, 50),
|
||||
60: (40, 52, 65),
|
||||
}
|
||||
|
||||
def __init__(self, filepath: str, duration: Optional[int] = None):
|
||||
self.filepath = Path(filepath)
|
||||
self.duration = duration
|
||||
self.file_type = self.filepath.suffix.lower()
|
||||
self.issues = []
|
||||
self.warnings = []
|
||||
self.info = []
|
||||
|
||||
def validate(self) -> Dict:
|
||||
"""Run all validations and return results."""
|
||||
print(f"Validating: {self.filepath.name}")
|
||||
print(f"File type: {self.file_type}")
|
||||
print("=" * 60)
|
||||
|
||||
# Check file exists
|
||||
if not self.filepath.exists():
|
||||
self.issues.append(f"File not found: {self.filepath}")
|
||||
return self._format_results()
|
||||
|
||||
# File size check
|
||||
self._check_file_size()
|
||||
|
||||
# Type-specific validation
|
||||
if self.file_type == '.pdf':
|
||||
self._validate_pdf()
|
||||
elif self.file_type in ['.pptx', '.ppt']:
|
||||
self._validate_pptx()
|
||||
elif self.file_type in ['.tex']:
|
||||
self._validate_latex()
|
||||
else:
|
||||
self.warnings.append(f"Unknown file type: {self.file_type}")
|
||||
|
||||
return self._format_results()
|
||||
|
||||
def _check_file_size(self):
|
||||
"""Check if file size is reasonable."""
|
||||
size_mb = self.filepath.stat().st_size / (1024 * 1024)
|
||||
self.info.append(f"File size: {size_mb:.2f} MB")
|
||||
|
||||
if size_mb > 100:
|
||||
self.issues.append(
|
||||
f"File is very large ({size_mb:.1f} MB). "
|
||||
"Consider compressing images."
|
||||
)
|
||||
elif size_mb > 50:
|
||||
self.warnings.append(
|
||||
f"File is large ({size_mb:.1f} MB). "
|
||||
"May be slow to email or upload."
|
||||
)
|
||||
|
||||
def _validate_pdf(self):
|
||||
"""Validate PDF presentation."""
|
||||
if not HAS_PYPDF2:
|
||||
self.warnings.append(
|
||||
"PyPDF2 not installed. Install with: pip install PyPDF2"
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
with open(self.filepath, 'rb') as f:
|
||||
reader = PyPDF2.PdfReader(f)
|
||||
num_pages = len(reader.pages)
|
||||
|
||||
self.info.append(f"Number of slides: {num_pages}")
|
||||
|
||||
# Check slide count against duration
|
||||
if self.duration:
|
||||
self._check_slide_count(num_pages)
|
||||
|
||||
# Get page size
|
||||
first_page = reader.pages[0]
|
||||
media_box = first_page.mediabox
|
||||
width = float(media_box.width)
|
||||
height = float(media_box.height)
|
||||
|
||||
# Convert points to inches (72 points = 1 inch)
|
||||
width_in = width / 72
|
||||
height_in = height / 72
|
||||
aspect = width / height
|
||||
|
||||
self.info.append(
|
||||
f"Slide dimensions: {width_in:.1f}\" × {height_in:.1f}\" "
|
||||
f"(aspect ratio: {aspect:.2f})"
|
||||
)
|
||||
|
||||
# Check common aspect ratios
|
||||
if abs(aspect - 16/9) < 0.01:
|
||||
self.info.append("Aspect ratio: 16:9 (widescreen)")
|
||||
elif abs(aspect - 4/3) < 0.01:
|
||||
self.info.append("Aspect ratio: 4:3 (standard)")
|
||||
else:
|
||||
self.warnings.append(
|
||||
f"Unusual aspect ratio: {aspect:.2f}. "
|
||||
"Confirm this matches venue requirements."
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
self.issues.append(f"Error reading PDF: {str(e)}")
|
||||
|
||||
def _validate_pptx(self):
|
||||
"""Validate PowerPoint presentation."""
|
||||
if not HAS_PPTX:
|
||||
self.warnings.append(
|
||||
"python-pptx not installed. Install with: pip install python-pptx"
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
prs = Presentation(self.filepath)
|
||||
num_slides = len(prs.slides)
|
||||
|
||||
self.info.append(f"Number of slides: {num_slides}")
|
||||
|
||||
# Check slide count against duration
|
||||
if self.duration:
|
||||
self._check_slide_count(num_slides)
|
||||
|
||||
# Get slide dimensions
|
||||
width_inches = prs.slide_width / 914400 # EMU to inches
|
||||
height_inches = prs.slide_height / 914400
|
||||
aspect = prs.slide_width / prs.slide_height
|
||||
|
||||
self.info.append(
|
||||
f"Slide dimensions: {width_inches:.1f}\" × {height_inches:.1f}\" "
|
||||
f"(aspect ratio: {aspect:.2f})"
|
||||
)
|
||||
|
||||
# Check fonts and text
|
||||
self._check_pptx_content(prs)
|
||||
|
||||
except Exception as e:
|
||||
self.issues.append(f"Error reading PowerPoint: {str(e)}")
|
||||
|
||||
def _check_pptx_content(self, prs):
|
||||
"""Check PowerPoint content for common issues."""
|
||||
small_text_slides = []
|
||||
many_bullets_slides = []
|
||||
|
||||
for idx, slide in enumerate(prs.slides, start=1):
|
||||
for shape in slide.shapes:
|
||||
if not shape.has_text_frame:
|
||||
continue
|
||||
|
||||
text_frame = shape.text_frame
|
||||
|
||||
# Check for small fonts
|
||||
for paragraph in text_frame.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
if run.font.size and run.font.size.pt < 18:
|
||||
small_text_slides.append(idx)
|
||||
break
|
||||
|
||||
# Check for too many bullets
|
||||
bullet_count = sum(1 for p in text_frame.paragraphs if p.level == 0)
|
||||
if bullet_count > 6:
|
||||
many_bullets_slides.append(idx)
|
||||
|
||||
# Report issues
|
||||
if small_text_slides:
|
||||
unique_slides = sorted(set(small_text_slides))
|
||||
self.warnings.append(
|
||||
f"Small text (<18pt) found on slides: {unique_slides[:5]}"
|
||||
+ (" ..." if len(unique_slides) > 5 else "")
|
||||
)
|
||||
|
||||
if many_bullets_slides:
|
||||
unique_slides = sorted(set(many_bullets_slides))
|
||||
self.warnings.append(
|
||||
f"Many bullets (>6) on slides: {unique_slides[:5]}"
|
||||
+ (" ..." if len(unique_slides) > 5 else "")
|
||||
)
|
||||
|
||||
def _validate_latex(self):
|
||||
"""Validate LaTeX Beamer presentation."""
|
||||
self.info.append("LaTeX source file detected")
|
||||
|
||||
# Try to compile
|
||||
if self._try_compile_latex():
|
||||
self.info.append("LaTeX compilation: SUCCESS")
|
||||
|
||||
# If PDF was generated, validate it
|
||||
pdf_path = self.filepath.with_suffix('.pdf')
|
||||
if pdf_path.exists():
|
||||
pdf_validator = PresentationValidator(str(pdf_path), self.duration)
|
||||
pdf_results = pdf_validator.validate()
|
||||
|
||||
# Merge results
|
||||
self.info.extend(pdf_results['info'])
|
||||
self.warnings.extend(pdf_results['warnings'])
|
||||
self.issues.extend(pdf_results['issues'])
|
||||
else:
|
||||
self.issues.append(
|
||||
"LaTeX compilation failed. Check .log file for errors."
|
||||
)
|
||||
|
||||
def _try_compile_latex(self) -> bool:
|
||||
"""Try to compile LaTeX file."""
|
||||
try:
|
||||
# Try pdflatex
|
||||
result = subprocess.run(
|
||||
['pdflatex', '-interaction=nonstopmode', self.filepath.name],
|
||||
cwd=self.filepath.parent,
|
||||
capture_output=True,
|
||||
timeout=60
|
||||
)
|
||||
return result.returncode == 0
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
return False
|
||||
|
||||
def _check_slide_count(self, num_slides: int):
|
||||
"""Check if slide count is appropriate for duration."""
|
||||
if self.duration not in self.SLIDE_GUIDELINES:
|
||||
# Find nearest duration
|
||||
durations = sorted(self.SLIDE_GUIDELINES.keys())
|
||||
nearest = min(durations, key=lambda x: abs(x - self.duration))
|
||||
min_slides, rec_slides, max_slides = self.SLIDE_GUIDELINES[nearest]
|
||||
self.info.append(
|
||||
f"Using guidelines for {nearest}-minute talk "
|
||||
f"(closest to {self.duration} minutes)"
|
||||
)
|
||||
else:
|
||||
min_slides, rec_slides, max_slides = self.SLIDE_GUIDELINES[self.duration]
|
||||
|
||||
self.info.append(
|
||||
f"Recommended slides for {self.duration}-minute talk: "
|
||||
f"{min_slides}-{max_slides} (optimal: ~{rec_slides})"
|
||||
)
|
||||
|
||||
if num_slides < min_slides:
|
||||
self.warnings.append(
|
||||
f"Fewer slides ({num_slides}) than recommended ({min_slides}-{max_slides}). "
|
||||
"May have too much time or too little content."
|
||||
)
|
||||
elif num_slides > max_slides:
|
||||
self.warnings.append(
|
||||
f"More slides ({num_slides}) than recommended ({min_slides}-{max_slides}). "
|
||||
"Likely to run over time."
|
||||
)
|
||||
else:
|
||||
self.info.append(
|
||||
f"Slide count ({num_slides}) is within recommended range."
|
||||
)
|
||||
|
||||
def _format_results(self) -> Dict:
|
||||
"""Format validation results."""
|
||||
return {
|
||||
'filepath': str(self.filepath),
|
||||
'file_type': self.file_type,
|
||||
'info': self.info,
|
||||
'warnings': self.warnings,
|
||||
'issues': self.issues,
|
||||
'valid': len(self.issues) == 0
|
||||
}
|
||||
|
||||
|
||||
def print_results(results: Dict):
|
||||
"""Print validation results in a readable format."""
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("VALIDATION RESULTS")
|
||||
print("=" * 60)
|
||||
|
||||
# Print info
|
||||
if results['info']:
|
||||
print("\n📋 Information:")
|
||||
for item in results['info']:
|
||||
print(f" • {item}")
|
||||
|
||||
# Print warnings
|
||||
if results['warnings']:
|
||||
print("\n⚠️ Warnings:")
|
||||
for item in results['warnings']:
|
||||
print(f" • {item}")
|
||||
|
||||
# Print issues
|
||||
if results['issues']:
|
||||
print("\n❌ Issues:")
|
||||
for item in results['issues']:
|
||||
print(f" • {item}")
|
||||
|
||||
# Overall status
|
||||
print("\n" + "=" * 60)
|
||||
if results['valid']:
|
||||
print("✅ Validation PASSED")
|
||||
if results['warnings']:
|
||||
print(f" ({len(results['warnings'])} warning(s) found)")
|
||||
else:
|
||||
print("❌ Validation FAILED")
|
||||
print(f" ({len(results['issues'])} issue(s) found)")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Validate scientific presentations',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s presentation.pdf --duration 15
|
||||
%(prog)s slides.pptx --duration 45
|
||||
%(prog)s beamer_talk.tex --duration 20
|
||||
|
||||
Supported file types:
|
||||
- PDF (.pdf)
|
||||
- PowerPoint (.pptx, .ppt)
|
||||
- LaTeX Beamer (.tex)
|
||||
|
||||
Validation checks:
|
||||
- Slide count vs. duration
|
||||
- File size
|
||||
- Slide dimensions
|
||||
- Font sizes (PowerPoint)
|
||||
- LaTeX compilation (Beamer)
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'filepath',
|
||||
help='Path to presentation file (PDF, PPTX, or TEX)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--duration', '-d',
|
||||
type=int,
|
||||
help='Presentation duration in minutes'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--quiet', '-q',
|
||||
action='store_true',
|
||||
help='Only show issues and warnings'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate
|
||||
validator = PresentationValidator(args.filepath, args.duration)
|
||||
results = validator.validate()
|
||||
|
||||
# Print results
|
||||
if args.quiet:
|
||||
# Only show warnings and issues
|
||||
if results['warnings'] or results['issues']:
|
||||
print_results(results)
|
||||
else:
|
||||
print("✅ No issues found")
|
||||
else:
|
||||
print_results(results)
|
||||
|
||||
# Exit with appropriate code
|
||||
sys.exit(0 if results['valid'] else 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user