From 90de96a99b2935a22ffb9040291bd1d661bc86dd Mon Sep 17 00:00:00 2001 From: Timothy Kassis Date: Sun, 30 Nov 2025 09:30:58 -0500 Subject: [PATCH] Add support for generating scientific illustrations using Nano Banan Pro and Flux.2 Pro --- .claude-plugin/marketplace.json | 3 +- README.md | 11 +- docs/examples.md | 147 +++++++++ docs/scientific-skills.md | 1 + scientific-skills/generate-image/SKILL.md | 124 ++++++++ .../generate-image/scripts/generate_image.py | 281 ++++++++++++++++++ 6 files changed, 561 insertions(+), 6 deletions(-) create mode 100644 scientific-skills/generate-image/SKILL.md create mode 100755 scientific-skills/generate-image/scripts/generate_image.py diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index f2b31c9..5dbee73 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -7,7 +7,7 @@ }, "metadata": { "description": "Claude scientific skills from K-Dense Inc", - "version": "2.6.0" + "version": "2.7.0" }, "plugins": [ { @@ -105,6 +105,7 @@ "./scientific-skills/ensembl-database", "./scientific-skills/fda-database", "./scientific-skills/gene-database", + "./scientific-skills/generate-image", "./scientific-skills/geo-database", "./scientific-skills/gwas-database", "./scientific-skills/hmdb-database", diff --git a/README.md b/README.md index bd20cb0..c7d1f1d 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # Claude Scientific Skills [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE.md) -[![Skills](https://img.shields.io/badge/Skills-127-brightgreen.svg)](#whats-included) +[![Skills](https://img.shields.io/badge/Skills-128-brightgreen.svg)](#whats-included) -A comprehensive collection of **127+ ready-to-use scientific skills** for Claude, created by the K-Dense team. Transform Claude into your AI research assistant capable of executing complex multi-step scientific workflows across biology, chemistry, medicine, and beyond. +A comprehensive collection of **128+ ready-to-use scientific skills** for Claude, created by the K-Dense team. Transform Claude into your AI research assistant capable of executing complex multi-step scientific workflows across biology, chemistry, medicine, and beyond. These skills enable Claude to seamlessly work with specialized scientific libraries, databases, and tools across multiple scientific domains: - 🧬 Bioinformatics & Genomics - Sequence analysis, single-cell RNA-seq, gene regulatory networks, variant annotation, phylogenetic analysis @@ -32,7 +32,7 @@ These skills enable Claude to seamlessly work with specialized scientific librar ## 📦 What's Included -This repository provides **127+ scientific skills** organized into the following categories: +This repository provides **128+ scientific skills** organized into the following categories: - **26+ Scientific Databases** - Direct API access to OpenAlex, PubMed, ChEMBL, UniProt, COSMIC, ClinicalTrials.gov, and more - **54+ Python Packages** - RDKit, Scanpy, PyTorch Lightning, scikit-learn, BioPython, PennyLane, Qiskit, and others @@ -335,7 +335,7 @@ networks, and search GEO for similar patterns. ## 📚 Available Skills -This repository contains **122+ scientific skills** organized across multiple domains. Each skill provides comprehensive documentation, code examples, and best practices for working with scientific libraries, databases, and tools. +This repository contains **123+ scientific skills** organized across multiple domains. Each skill provides comprehensive documentation, code examples, and best practices for working with scientific libraries, databases, and tools. ### Skill Categories @@ -407,12 +407,13 @@ This repository contains **122+ scientific skills** organized across multiple do - Protein language models: ESM - Cloud laboratory platform: Adaptyv (automated protein testing and validation) -#### 📚 **Scientific Communication** (9+ skills) +#### 📚 **Scientific Communication** (10+ skills) - Literature: OpenAlex, PubMed, Literature Review - Web search: Perplexity Search (AI-powered search with real-time information) - Writing: Scientific Writing, Peer Review - Document processing: DOCX, PDF, PPTX, XLSX, MarkItDown - Publishing: Paper-2-Web +- Illustration: Generate Image (AI image generation with FLUX.2 Pro and Gemini 3 Pro) #### 🔬 **Scientific Databases** (26+ skills) - Protein: UniProt, PDB, AlphaFold DB diff --git a/docs/examples.md b/docs/examples.md index c38f32c..8ef53e0 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -25,6 +25,7 @@ This document provides comprehensive, practical examples demonstrating how to co 17. [Clinical Research & Real-World Evidence](#clinical-research--real-world-evidence) 18. [Experimental Physics & Data Analysis](#experimental-physics--data-analysis) 19. [Chemical Engineering & Process Optimization](#chemical-engineering--process-optimization) +20. [Scientific Illustration & Visual Communication](#scientific-illustration--visual-communication) --- @@ -2490,6 +2491,152 @@ Expected Output: --- +## Scientific Illustration & Visual Communication + +### Example 20: Creating Publication-Ready Scientific Figures + +**Objective**: Generate and refine scientific illustrations, diagrams, and graphical abstracts for publications and presentations. + +**Skills Used**: +- `generate-image` - AI image generation and editing +- `matplotlib` - Data visualization +- `scientific-visualization` - Best practices +- `scientific-writing` - Figure caption creation +- `reportlab` - PDF report generation + +**Workflow**: + +```bash +Step 1: Plan visual communication strategy +- Identify key concepts that need visual representation: + * Experimental workflow diagrams + * Molecular structures and interactions + * Data visualization (handled by matplotlib) + * Conceptual illustrations for mechanisms + * Graphical abstract for paper summary +- Determine appropriate style for target journal/audience +- Sketch rough layouts for each figure + +Step 2: Generate experimental workflow diagram +- Use generate-image skill with detailed prompt: + "Scientific illustration showing a step-by-step experimental + workflow for CRISPR gene editing: (1) guide RNA design at computer, + (2) cell culture in petri dish, (3) electroporation device, + (4) selection with antibiotics, (5) sequencing validation. + Clean, professional style with numbered steps, white background, + suitable for scientific publication." +- Save as workflow_diagram.png +- Review and iterate on prompt if needed + +Step 3: Create molecular interaction schematic +- Generate detailed molecular visualization: + "Scientific diagram of protein-ligand binding mechanism: + show receptor protein (blue ribbon structure) with binding pocket, + small molecule ligand (ball-and-stick, orange) approaching, + key hydrogen bonds indicated with dashed lines, water molecules + in binding site. Professional biochemistry illustration style, + clean white background, publication quality." +- Generate multiple versions with different angles/styles +- Select best representation + +Step 4: Edit existing figures for consistency +- Load existing figure that needs modification: + python scripts/generate_image.py "Change the background to white + and make the protein blue instead of green" --input figure1.png +- Standardize color schemes across all figures +- Edit to match journal style guidelines: + python scripts/generate_image.py "Remove the title text and + increase contrast for print publication" --input diagram.png + +Step 5: Generate graphical abstract +- Create comprehensive visual summary: + "Graphical abstract for cancer immunotherapy paper: left side + shows tumor cells (irregular shapes, red) being attacked by + T cells (round, blue). Center shows the drug molecule structure. + Right side shows healthy tissue (green). Arrow flow from left + to right indicating treatment progression. Modern, clean style + with minimal text, high contrast, suitable for journal TOC." +- Ensure dimensions meet journal requirements +- Iterate to highlight key findings + +Step 6: Create conceptual mechanism illustrations +- Generate mechanism diagrams: + "Scientific illustration of enzyme catalysis mechanism: + Show substrate entering active site (step 1), transition state + formation with electron movement arrows (step 2), product + release (step 3). Use standard biochemistry notation, + curved arrows for electron movement, clear labeling." +- Generate alternative representations for supplementary materials + +Step 7: Produce presentation-ready figures +- Create high-impact visuals for talks: + "Eye-catching scientific illustration of DNA double helix + unwinding during replication, with DNA polymerase (large + green structure) adding nucleotides. Dynamic composition, + vibrant but professional colors, dark background for + presentation slides." +- Adjust style for poster vs slide format +- Create versions at different resolutions + +Step 8: Generate figure panels for multi-part figures +- Create consistent series of related images: + "Panel A: Normal cell with intact membrane (green outline) + Panel B: Cell under oxidative stress with damaged membrane + Panel C: Cell treated with antioxidant, membrane recovering + Consistent style across all panels, same scale, white background, + scientific illustration style suitable for publication." +- Ensure visual consistency across panels +- Annotate with panel labels + +Step 9: Edit for accessibility +- Modify figures for colorblind accessibility: + python scripts/generate_image.py "Change the red and green + elements to blue and orange for colorblind accessibility, + maintain all other aspects" --input figure_v1.png +- Add patterns or textures for additional differentiation +- Verify contrast meets accessibility standards + +Step 10: Create supplementary visual materials +- Generate additional context figures: + "Anatomical diagram showing location of pancreatic islets + within the pancreas, cross-section view with labeled structures: + alpha cells, beta cells, blood vessels. Medical illustration + style, educational, suitable for supplementary materials." +- Create protocol flowcharts and decision trees +- Generate equipment setup diagrams + +Step 11: Compile figure legends and captions +- Use scientific-writing skill to create descriptions: + * Figure number and title + * Detailed description of what is shown + * Explanation of symbols, colors, and abbreviations + * Scale bars and measurement units + * Statistical information if applicable +- Format according to journal guidelines + +Step 12: Assemble final publication package +- Organize all figures in publication order +- Create high-resolution exports (300+ DPI for print) +- Generate both RGB (web) and CMYK (print) versions +- Compile into PDF using ReportLab: + * Title page with graphical abstract + * All figures with captions + * Supplementary figures section +- Create separate folder with individual figure files +- Document all generation prompts for reproducibility + +Expected Output: +- Complete set of publication-ready scientific illustrations +- Graphical abstract for table of contents +- Mechanism diagrams and workflow figures +- Edited versions meeting journal style guidelines +- Accessibility-compliant figure versions +- Figure package with captions and metadata +- Documentation of prompts used for reproducibility +``` + +--- + ## Summary These examples demonstrate: diff --git a/docs/scientific-skills.md b/docs/scientific-skills.md index 3d78855..8ebb04c 100644 --- a/docs/scientific-skills.md +++ b/docs/scientific-skills.md @@ -154,6 +154,7 @@ - **HypoGeniC** - Automated hypothesis generation and testing using large language models to accelerate scientific discovery. Provides three frameworks: HypoGeniC (data-driven hypothesis generation from observational data), HypoRefine (synergistic approach combining literature insights with empirical patterns through an agentic system), and Union methods (mechanistic combination of literature and data-driven hypotheses). Features iterative refinement that improves hypotheses by learning from challenging examples, Redis caching for API cost reduction, and customizable YAML-based prompt templates. Includes command-line tools for generation (hypogenic_generation) and testing (hypogenic_inference). Research applications have demonstrated 14.19% accuracy improvement in AI-content detection and 7.44% in deception detection. Use cases: deception detection in reviews, AI-generated content identification, mental stress detection, exploratory research without existing literature, hypothesis-driven analysis in novel domains, and systematic exploration of competing explanations ### Scientific Communication & Publishing +- **Generate Image** - AI-powered image generation and editing for scientific illustrations, schematics, and visualizations using OpenRouter's image generation models. Supports multiple models including google/gemini-3-pro-image-preview (high quality, recommended default) and black-forest-labs/flux.2-pro (fast, high quality). Key features include: text-to-image generation from detailed prompts, image editing capabilities (modify existing images with natural language instructions), automatic base64 encoding/decoding, PNG output with configurable paths, and comprehensive error handling. Requires OpenRouter API key (via .env file or environment variable). Use cases: generating scientific diagrams and illustrations, creating publication-quality figures, editing existing images (changing colors, adding elements, removing backgrounds), producing schematics for papers and presentations, visualizing experimental setups, creating graphical abstracts, and generating conceptual illustrations for scientific communication - **Paper-2-Web** - Autonomous pipeline for transforming academic papers into multiple promotional formats using the Paper2All system. Converts LaTeX or PDF papers into: (1) Paper2Web - interactive, layout-aware academic homepages with responsive design, interactive figures, and mobile support; (2) Paper2Video - professional presentation videos with slides, narration, cursor movements, and optional talking-head generation using Hallo2; (3) Paper2Poster - print-ready conference posters with custom dimensions, professional layouts, and institution branding. Supports GPT-4/GPT-4.1 models, batch processing, QR code generation, multi-language content, and quality assessment metrics. Use cases: conference materials, video abstracts, preprint enhancement, research promotion, poster sessions, and academic website creation - **Perplexity Search** - AI-powered web search using Perplexity models via LiteLLM and OpenRouter for real-time, web-grounded answers with source citations. Provides access to multiple Perplexity models: Sonar Pro (general-purpose, best cost-quality balance), Sonar Pro Search (most advanced agentic search with multi-step reasoning), Sonar (cost-effective for simple queries), Sonar Reasoning Pro (advanced step-by-step analysis), and Sonar Reasoning (basic reasoning). Key features include: single OpenRouter API key setup (no separate Perplexity account), real-time access to current information beyond training data cutoff, comprehensive query design guidance (domain-specific patterns, time constraints, source preferences), cost optimization strategies with usage monitoring, programmatic and CLI interfaces, batch processing support, and integration with other scientific skills. Installation uses uv pip for LiteLLM, with detailed setup, troubleshooting, and security documentation. Use cases: finding recent scientific publications and research, conducting literature searches across domains, verifying facts with source citations, accessing current developments in any field, comparing technologies and approaches, performing domain-specific research (biomedical, clinical, technical), supplementing PubMed searches with real-time web results, and discovering latest developments post-database indexing diff --git a/scientific-skills/generate-image/SKILL.md b/scientific-skills/generate-image/SKILL.md new file mode 100644 index 0000000..74bddd6 --- /dev/null +++ b/scientific-skills/generate-image/SKILL.md @@ -0,0 +1,124 @@ +--- +name: generate-image +description: Generate or edit scientific illustrations, schematics and images. Also use if the user mentions specific models like "Flux" or "Nano Banana". +--- + +# Generate Image + +Generate and edit high-quality images using OpenRouter's image generation models including FLUX.2 Pro and Nano Banana Pro (Gemini 3 Pro). + +## Quick Start + +Use the `scripts/generate_image.py` script to generate or edit images: + +```bash +# Generate a new image +python scripts/generate_image.py "A beautiful sunset over mountains" + +# Edit an existing image +python scripts/generate_image.py "Make the sky purple" --input photo.jpg +``` + +This generates/edits an image and saves it as `generated_image.png` in the current directory. + +## API Key Setup + +**CRITICAL**: The script requires an OpenRouter API key. Before running, check if the user has configured their API key: + +1. Look for a `.env` file in the project directory or parent directories +2. Check for `OPENROUTER_API_KEY=` in the `.env` file +3. If not found, inform the user they need to: + - Create a `.env` file with `OPENROUTER_API_KEY=your-api-key-here` + - Or set the environment variable: `export OPENROUTER_API_KEY=your-api-key-here` + - Get an API key from: https://openrouter.ai/keys + +The script will automatically detect the `.env` file and provide clear error messages if the API key is missing. + +## Model Selection + +**Default model**: `google/gemini-3-pro-image-preview` (high quality, recommended) + +**Available models for generation and editing**: +- `google/gemini-3-pro-image-preview` - High quality, supports generation + editing +- `black-forest-labs/flux.2-pro` - Fast, high quality, supports generation + editing + +**Generation only**: +- `black-forest-labs/flux.2-dev` - Development version, generation only + +Select based on: +- **Quality**: Use gemini-3-pro or flux.2-pro +- **Editing**: Use gemini-3-pro or flux.2-pro (both support image editing) +- **Cost**: Use flux.2-dev for generation only + +## Common Usage Patterns + +### Basic generation +```bash +python scripts/generate_image.py "Your prompt here" +``` + +### Specify model +```bash +python scripts/generate_image.py "A cat in space" --model "black-forest-labs/flux.2-pro" +``` + +### Custom output path +```bash +python scripts/generate_image.py "Abstract art" --output artwork.png +``` + +### Edit an existing image +```bash +python scripts/generate_image.py "Make the background blue" --input photo.jpg +``` + +### Edit with a specific model +```bash +python scripts/generate_image.py "Add sunglasses to the person" --input portrait.png --model "black-forest-labs/flux.2-pro" +``` + +### Edit with custom output +```bash +python scripts/generate_image.py "Remove the text from the image" --input screenshot.png --output cleaned.png +``` + +### Multiple images +Run the script multiple times with different prompts or output paths: +```bash +python scripts/generate_image.py "Image 1 description" --output image1.png +python scripts/generate_image.py "Image 2 description" --output image2.png +``` + +## Script Parameters + +- `prompt` (required): Text description of the image to generate, or editing instructions +- `--input` or `-i`: Input image path for editing (enables edit mode) +- `--model` or `-m`: OpenRouter model ID (default: google/gemini-3-pro-image-preview) +- `--output` or `-o`: Output file path (default: generated_image.png) +- `--api-key`: OpenRouter API key (overrides .env file) + +## Error Handling + +The script provides clear error messages for: +- Missing API key (with setup instructions) +- API errors (with status codes) +- Unexpected response formats +- Missing dependencies (requests library) + +If the script fails, read the error message and address the issue before retrying. + +## Notes + +- Images are returned as base64-encoded data URLs and automatically saved as PNG files +- The script supports both `images` and `content` response formats from different OpenRouter models +- Generation time varies by model (typically 5-30 seconds) +- For image editing, the input image is encoded as base64 and sent to the model +- Supported input image formats: PNG, JPEG, GIF, WebP +- Check OpenRouter pricing for cost information: https://openrouter.ai/models + +## Image Editing Tips + +- Be specific about what changes you want (e.g., "change the sky to sunset colors" vs "edit the sky") +- Reference specific elements in the image when possible +- For best results, use clear and detailed editing instructions +- Both Gemini 3 Pro and FLUX.2 Pro support image editing through OpenRouter diff --git a/scientific-skills/generate-image/scripts/generate_image.py b/scientific-skills/generate-image/scripts/generate_image.py new file mode 100755 index 0000000..e758364 --- /dev/null +++ b/scientific-skills/generate-image/scripts/generate_image.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +""" +Generate and edit images using OpenRouter API with various image generation models. + +Supports models like: +- google/gemini-3-pro-image-preview (generation and editing) +- black-forest-labs/flux.2-pro (generation and editing) +- black-forest-labs/flux.2-dev (generation) +- And more image generation models available on OpenRouter + +For image editing, provide an input image along with an editing prompt. +""" + +import sys +import json +import base64 +import argparse +from pathlib import Path +from typing import Optional + + +def check_env_file() -> Optional[str]: + """Check if .env file exists and contains OPENROUTER_API_KEY.""" + # Look for .env in current directory and parent directories + current_dir = Path.cwd() + for parent in [current_dir] + list(current_dir.parents): + env_file = parent / ".env" + if env_file.exists(): + with open(env_file, 'r') as f: + for line in f: + if line.startswith('OPENROUTER_API_KEY='): + api_key = line.split('=', 1)[1].strip().strip('"').strip("'") + if api_key: + return api_key + return None + + +def load_image_as_base64(image_path: str) -> str: + """Load an image file and return it as a base64 data URL.""" + path = Path(image_path) + if not path.exists(): + print(f"❌ Error: Image file not found: {image_path}") + sys.exit(1) + + # Determine MIME type from extension + ext = path.suffix.lower() + mime_types = { + '.png': 'image/png', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.gif': 'image/gif', + '.webp': 'image/webp', + } + mime_type = mime_types.get(ext, 'image/png') + + with open(path, 'rb') as f: + image_data = f.read() + + base64_data = base64.b64encode(image_data).decode('utf-8') + return f"data:{mime_type};base64,{base64_data}" + + +def save_base64_image(base64_data: str, output_path: str) -> None: + """Save base64 encoded image to file.""" + # Remove data URL prefix if present + if ',' in base64_data: + base64_data = base64_data.split(',', 1)[1] + + # Decode and save + image_data = base64.b64decode(base64_data) + with open(output_path, 'wb') as f: + f.write(image_data) + + +def generate_image( + prompt: str, + model: str = "google/gemini-3-pro-image-preview", + output_path: str = "generated_image.png", + api_key: Optional[str] = None, + input_image: Optional[str] = None +) -> dict: + """ + Generate or edit an image using OpenRouter API. + + Args: + prompt: Text description of the image to generate, or editing instructions + model: OpenRouter model ID (default: google/gemini-3-pro-image-preview) + output_path: Path to save the generated image + api_key: OpenRouter API key (will check .env if not provided) + input_image: Path to an input image for editing (optional) + + Returns: + dict: Response from OpenRouter API + """ + try: + import requests + except ImportError: + print("Error: 'requests' library not found. Install with: pip install requests") + sys.exit(1) + + # Check for API key + if not api_key: + api_key = check_env_file() + + if not api_key: + print("❌ Error: OPENROUTER_API_KEY not found!") + print("\nPlease create a .env file in your project directory with:") + print("OPENROUTER_API_KEY=your-api-key-here") + print("\nOr set the environment variable:") + print("export OPENROUTER_API_KEY=your-api-key-here") + print("\nGet your API key from: https://openrouter.ai/keys") + sys.exit(1) + + # Determine if this is generation or editing + is_editing = input_image is not None + + if is_editing: + print(f"✏️ Editing image with model: {model}") + print(f"📷 Input image: {input_image}") + print(f"📝 Edit prompt: {prompt}") + + # Load input image as base64 + image_data_url = load_image_as_base64(input_image) + + # Build multimodal message content for image editing + message_content = [ + { + "type": "text", + "text": prompt + }, + { + "type": "image_url", + "image_url": { + "url": image_data_url + } + } + ] + else: + print(f"🎨 Generating image with model: {model}") + print(f"📝 Prompt: {prompt}") + message_content = prompt + + # Make API request + response = requests.post( + url="https://openrouter.ai/api/v1/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "messages": [ + { + "role": "user", + "content": message_content + } + ], + "modalities": ["image", "text"] + } + ) + + # Check for errors + if response.status_code != 200: + print(f"❌ API Error ({response.status_code}): {response.text}") + sys.exit(1) + + result = response.json() + + # Extract and save image + if result.get("choices"): + message = result["choices"][0]["message"] + + # Handle both 'images' and 'content' response formats + images = [] + + if message.get("images"): + images = message["images"] + elif message.get("content"): + # Some models return content as array with image parts + content = message["content"] + if isinstance(content, list): + for part in content: + if isinstance(part, dict) and part.get("type") == "image": + images.append(part) + + if images: + # Save the first image + image = images[0] + if "image_url" in image: + image_url = image["image_url"]["url"] + save_base64_image(image_url, output_path) + print(f"✅ Image saved to: {output_path}") + elif "url" in image: + save_base64_image(image["url"], output_path) + print(f"✅ Image saved to: {output_path}") + else: + print(f"⚠️ Unexpected image format: {image}") + else: + print("⚠️ No image found in response") + if message.get("content"): + print(f"Response content: {message['content']}") + else: + print("❌ No choices in response") + print(f"Response: {json.dumps(result, indent=2)}") + + return result + + +def main(): + parser = argparse.ArgumentParser( + description="Generate or edit images using OpenRouter API", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate with default model (Gemini 3 Pro Image Preview) + python generate_image.py "A beautiful sunset over mountains" + + # Use a specific model + python generate_image.py "A cat in space" --model "black-forest-labs/flux.2-pro" + + # Specify output path + python generate_image.py "Abstract art" --output my_image.png + + # Edit an existing image + python generate_image.py "Make the sky purple" --input photo.jpg --output edited.png + + # Edit with a specific model + python generate_image.py "Add a hat to the person" --input portrait.png -m "black-forest-labs/flux.2-pro" + +Popular image models: + - google/gemini-3-pro-image-preview (default, high quality, generation + editing) + - black-forest-labs/flux.2-pro (fast, high quality, generation + editing) + - black-forest-labs/flux.2-dev (development version) + """ + ) + + parser.add_argument( + "prompt", + type=str, + help="Text description of the image to generate, or editing instructions" + ) + + parser.add_argument( + "--model", "-m", + type=str, + default="google/gemini-3-pro-image-preview", + help="OpenRouter model ID (default: google/gemini-3-pro-image-preview)" + ) + + parser.add_argument( + "--output", "-o", + type=str, + default="generated_image.png", + help="Output file path (default: generated_image.png)" + ) + + parser.add_argument( + "--input", "-i", + type=str, + help="Input image path for editing (enables edit mode)" + ) + + parser.add_argument( + "--api-key", + type=str, + help="OpenRouter API key (will check .env if not provided)" + ) + + args = parser.parse_args() + + generate_image( + prompt=args.prompt, + model=args.model, + output_path=args.output, + api_key=args.api_key, + input_image=args.input + ) + + +if __name__ == "__main__": + main()