commit 300fff94706353134a0e65ec2078cae9d65e5207 Author: dfty Date: Thu Jan 29 22:15:00 2026 +0800 Initial commit for scientific-schematics diff --git a/QUICK_REFERENCE.md b/QUICK_REFERENCE.md new file mode 100644 index 0000000..1f789ac --- /dev/null +++ b/QUICK_REFERENCE.md @@ -0,0 +1,208 @@ +# Scientific Schematics - Quick Reference + +**How it works:** Describe your diagram → Nano Banana Pro generates it automatically + +## Setup (One-Time) + +```bash +# Get API key from https://openrouter.ai/keys +export OPENROUTER_API_KEY='sk-or-v1-your_key_here' + +# Add to shell profile for persistence +echo 'export OPENROUTER_API_KEY="sk-or-v1-your_key"' >> ~/.bashrc # or ~/.zshrc +``` + +## Basic Usage + +```bash +# Describe your diagram, Nano Banana Pro creates it +python scripts/generate_schematic.py "your diagram description" -o output.png + +# That's it! Automatic: +# - Iterative refinement (3 rounds) +# - Quality review and improvement +# - Publication-ready output +``` + +## Common Examples + +### CONSORT Flowchart +```bash +python scripts/generate_schematic.py \ + "CONSORT flow: screened n=500, excluded n=150, randomized n=350" \ + -o consort.png +``` + +### Neural Network +```bash +python scripts/generate_schematic.py \ + "Transformer architecture with encoder and decoder stacks" \ + -o transformer.png +``` + +### Biological Pathway +```bash +python scripts/generate_schematic.py \ + "MAPK pathway: EGFR → RAS → RAF → MEK → ERK" \ + -o mapk.png +``` + +### Circuit Diagram +```bash +python scripts/generate_schematic.py \ + "Op-amp circuit with 1kΩ resistor and 10µF capacitor" \ + -o circuit.png +``` + +## Command Options + +| Option | Description | Example | +|--------|-------------|---------| +| `-o, --output` | Output file path | `-o figures/diagram.png` | +| `--iterations N` | Number of refinements (1-2) | `--iterations 2` | +| `-v, --verbose` | Show detailed output | `-v` | +| `--api-key KEY` | Provide API key | `--api-key sk-or-v1-...` | + +## Prompt Tips + +### ✓ Good Prompts (Specific) +- "CONSORT flowchart with screening (n=500), exclusion (n=150), randomization (n=350)" +- "Transformer architecture: encoder on left with 6 layers, decoder on right, cross-attention connections" +- "MAPK signaling: receptor → RAS → RAF → MEK → ERK → nucleus, label each phosphorylation" + +### ✗ Avoid (Too Vague) +- "Make a flowchart" +- "Neural network" +- "Pathway diagram" + +## Output Files + +For input `diagram.png`, you get: +- `diagram_v1.png` - First iteration +- `diagram_v2.png` - Second iteration +- `diagram_v3.png` - Final iteration +- `diagram.png` - Copy of final +- `diagram_review_log.json` - Quality scores and critiques + +## Review Log + +```json +{ + "iterations": [ + { + "iteration": 1, + "score": 7.0, + "critique": "Good start. Font too small..." + }, + { + "iteration": 2, + "score": 8.5, + "critique": "Much improved. Minor spacing issues..." + }, + { + "iteration": 3, + "score": 9.5, + "critique": "Excellent. Publication ready." + } + ], + "final_score": 9.5 +} +``` + +## Python API + +```python +from scripts.generate_schematic_ai import ScientificSchematicGenerator + +# Initialize +gen = ScientificSchematicGenerator(api_key="your_key") + +# Generate +results = gen.generate_iterative( + user_prompt="diagram description", + output_path="output.png", + iterations=2 +) + +# Check quality +print(f"Score: {results['final_score']}/10") +``` + +## Troubleshooting + +### API Key Not Found +```bash +# Check if set +echo $OPENROUTER_API_KEY + +# Set it +export OPENROUTER_API_KEY='your_key' +``` + +### Import Error +```bash +# Install requests +pip install requests +``` + +### Low Quality Score +- Make prompt more specific +- Include layout details (left-to-right, top-to-bottom) +- Specify label requirements +- Increase iterations: `--iterations 2` + +## Testing + +```bash +# Verify installation +python test_ai_generation.py + +# Should show: "6/6 tests passed" +``` + +## Cost + +Typical cost per diagram (max 2 iterations): +- Simple (1 iteration): $0.05-0.15 +- Complex (2 iterations): $0.10-0.30 + +## How Nano Banana Pro Works + +**Simply describe your diagram in natural language:** +- ✓ No coding required +- ✓ No templates needed +- ✓ No manual drawing +- ✓ Automatic quality review +- ✓ Publication-ready output +- ✓ Works for any diagram type +- ✓ No figure numbers included (added separately in document/LaTeX) + +**Just describe what you want, and it's generated automatically.** + +## Getting Help + +```bash +# Show help +python scripts/generate_schematic.py --help + +# Verbose mode for debugging +python scripts/generate_schematic.py "diagram" -o out.png -v +``` + +## Quick Start Checklist + +- [ ] Set `OPENROUTER_API_KEY` environment variable +- [ ] Run `python test_ai_generation.py` (should pass 6/6) +- [ ] Try: `python scripts/generate_schematic.py "test diagram" -o test.png` +- [ ] Review output files (test_v1.png, v2, v3, review_log.json) +- [ ] Read SKILL.md for detailed documentation +- [ ] Check README.md for examples + +## Resources + +- Full documentation: `SKILL.md` +- Detailed guide: `README.md` +- Implementation details: `IMPLEMENTATION_SUMMARY.md` +- Example script: `example_usage.sh` +- Get API key: https://openrouter.ai/keys + diff --git a/README.md b/README.md new file mode 100644 index 0000000..561c62f --- /dev/null +++ b/README.md @@ -0,0 +1,328 @@ +# Scientific Schematics - Nano Banana Pro + +**Generate any scientific diagram by describing it in natural language.** + +Nano Banana Pro creates publication-quality diagrams automatically - no coding, no templates, no manual drawing required. + +## Quick Start + +### Generate Any Diagram + +```bash +# Set your OpenRouter API key +export OPENROUTER_API_KEY='your_api_key_here' + +# Generate any scientific diagram +python scripts/generate_schematic.py "CONSORT participant flow diagram" -o figures/consort.png + +# Neural network architecture +python scripts/generate_schematic.py "Transformer encoder-decoder architecture" -o figures/transformer.png + +# Biological pathway +python scripts/generate_schematic.py "MAPK signaling pathway" -o figures/pathway.png +``` + +### What You Get + +- **Up to two iterations** (v1, v2) with progressive refinement +- **Automatic quality review** after each iteration +- **Detailed review log** with scores and critiques (JSON format) +- **Publication-ready images** following scientific standards + +## Features + +### Iterative Refinement Process + +1. **Generation 1**: Create initial diagram from your description +2. **Review 1**: AI evaluates clarity, labels, accuracy, accessibility +3. **Generation 2**: Improve based on critique +4. **Review 2**: Second evaluation with specific feedback +5. **Generation 3**: Final polished version + +### Automatic Quality Standards + +All diagrams automatically follow: +- Clean white/light background +- High contrast for readability +- Clear labels (minimum 10pt font) +- Professional typography +- Colorblind-friendly colors +- Proper spacing between elements +- Scale bars, legends, axes where appropriate +- **No figure numbers** - diagrams do not include "Figure 1:" or similar labels (added by document/LaTeX) + +## Installation + +### For AI Generation + +```bash +# Get OpenRouter API key +# Visit: https://openrouter.ai/keys + +# Set environment variable +export OPENROUTER_API_KEY='sk-or-v1-...' + +# Or add to .env file +echo "OPENROUTER_API_KEY=sk-or-v1-..." >> .env + +# Install Python dependencies (if not already installed) +pip install requests +``` + +## Usage Examples + +### Example 1: CONSORT Flowchart + +```bash +python scripts/generate_schematic.py \ + "CONSORT participant flow diagram for RCT. \ + Assessed for eligibility (n=500). \ + Excluded (n=150): age<18 (n=80), declined (n=50), other (n=20). \ + Randomized (n=350) into Treatment (n=175) and Control (n=175). \ + Lost to follow-up: 15 and 10 respectively. \ + Final analysis: 160 and 165." \ + -o figures/consort.png +``` + +**Output:** +- `figures/consort_v1.png` - Initial generation +- `figures/consort_v2.png` - After first review +- `figures/consort_v3.png` - Final version +- `figures/consort.png` - Copy of final version +- `figures/consort_review_log.json` - Detailed review log + +### Example 2: Neural Network Architecture + +```bash +python scripts/generate_schematic.py \ + "Transformer architecture with encoder on left (input embedding, \ + positional encoding, multi-head attention, feed-forward) and \ + decoder on right (masked attention, cross-attention, feed-forward). \ + Show cross-attention connection from encoder to decoder." \ + -o figures/transformer.png \ + --iterations 2 +``` + +### Example 3: Biological Pathway + +```bash +python scripts/generate_schematic.py \ + "MAPK signaling pathway: EGFR receptor → RAS → RAF → MEK → ERK → nucleus. \ + Label each step with phosphorylation. Use different colors for each kinase." \ + -o figures/mapk.png +``` + +### Example 4: System Architecture + +```bash +python scripts/generate_schematic.py \ + "IoT system block diagram: sensors (bottom) → microcontroller → \ + WiFi module and display (middle) → cloud server → mobile app (top). \ + Label all connections with protocols." \ + -o figures/iot_system.png +``` + +## Command-Line Options + +```bash +python scripts/generate_schematic.py [OPTIONS] "description" -o output.png + +Options: + --iterations N Number of AI refinement iterations (default: 2, max: 2) + --api-key KEY OpenRouter API key (or use env var) + -v, --verbose Verbose output + -h, --help Show help message +``` + +## Python API + +```python +from scripts.generate_schematic_ai import ScientificSchematicGenerator + +# Initialize +generator = ScientificSchematicGenerator( + api_key="your_key", + verbose=True +) + +# Generate with iterative refinement +results = generator.generate_iterative( + user_prompt="CONSORT flowchart", + output_path="figures/consort.png", + iterations=2 +) + +# Access results +print(f"Final score: {results['final_score']}/10") +print(f"Final image: {results['final_image']}") + +# Review iterations +for iteration in results['iterations']: + print(f"Iteration {iteration['iteration']}: {iteration['score']}/10") + print(f"Critique: {iteration['critique']}") +``` + +## Prompt Engineering Tips + +### Be Specific About Layout +✓ "Flowchart with vertical flow, top to bottom" +✓ "Architecture diagram with encoder on left, decoder on right" +✗ "Make a diagram" (too vague) + +### Include Quantitative Details +✓ "Neural network: input (784), hidden (128), output (10)" +✓ "Flowchart: n=500 screened, n=150 excluded, n=350 randomized" +✗ "Some numbers" (not specific) + +### Specify Visual Style +✓ "Minimalist block diagram with clean lines" +✓ "Detailed biological pathway with protein structures" +✓ "Technical schematic with engineering notation" + +### Request Specific Labels +✓ "Label all arrows with activation/inhibition" +✓ "Include layer dimensions in each box" +✓ "Show time progression with timestamps" + +### Mention Color Requirements +✓ "Use colorblind-friendly colors" +✓ "Grayscale-compatible design" +✓ "Color-code by function: blue=input, green=processing, red=output" + +## Review Log Format + +Each generation produces a JSON review log: + +```json +{ + "user_prompt": "CONSORT participant flow diagram...", + "iterations": [ + { + "iteration": 1, + "image_path": "figures/consort_v1.png", + "prompt": "Full generation prompt...", + "critique": "Score: 7/10. Issues: font too small...", + "score": 7.0, + "success": true + }, + { + "iteration": 2, + "image_path": "figures/consort_v2.png", + "score": 8.5, + "critique": "Much improved. Remaining issues..." + }, + { + "iteration": 3, + "image_path": "figures/consort_v3.png", + "score": 9.5, + "critique": "Excellent. Publication ready." + } + ], + "final_image": "figures/consort_v3.png", + "final_score": 9.5, + "success": true +} +``` + +## Why Use Nano Banana Pro + +**Simply describe what you want - Nano Banana Pro creates it:** + +- ✓ **Fast**: Results in minutes +- ✓ **Easy**: Natural language descriptions (no coding) +- ✓ **Quality**: Automatic review and refinement +- ✓ **Universal**: Works for all diagram types +- ✓ **Publication-ready**: High-quality output immediately + +**Just describe your diagram, and it's generated automatically.** + +## Troubleshooting + +### API Key Issues + +```bash +# Check if key is set +echo $OPENROUTER_API_KEY + +# Set temporarily +export OPENROUTER_API_KEY='your_key' + +# Set permanently (add to ~/.bashrc or ~/.zshrc) +echo 'export OPENROUTER_API_KEY="your_key"' >> ~/.bashrc +``` + +### Import Errors + +```bash +# Install requests library +pip install requests + +# Or use the package manager +pip install -r requirements.txt +``` + +### Generation Fails + +```bash +# Use verbose mode to see detailed errors +python scripts/generate_schematic.py "diagram" -o out.png -v + +# Check API status +curl https://openrouter.ai/api/v1/models +``` + +### Low Quality Scores + +If iterations consistently score below 7/10: +1. Make your prompt more specific +2. Include more details about layout and labels +3. Specify visual requirements explicitly +4. Increase iterations: `--iterations 2` + +## Testing + +Run verification tests: + +```bash +python test_ai_generation.py +``` + +This tests: +- File structure +- Module imports +- Class initialization +- Error handling +- Prompt engineering +- Wrapper script + +## Cost Considerations + +OpenRouter pricing for models used: +- **Nano Banana Pro**: ~$2/M input tokens, ~$12/M output tokens + +Typical costs per diagram: +- Simple diagram (1 iteration): ~$0.05-0.15 +- Complex diagram (2 iterations): ~$0.10-0.30 + +## Examples Gallery + +See the full SKILL.md for extensive examples including: +- CONSORT flowcharts +- Neural network architectures (Transformers, CNNs, RNNs) +- Biological pathways +- Circuit diagrams +- System architectures +- Block diagrams + +## Support + +For issues or questions: +1. Check SKILL.md for detailed documentation +2. Run test_ai_generation.py to verify setup +3. Use verbose mode (-v) to see detailed errors +4. Review the review_log.json for quality feedback + +## License + +Part of the scientific-writer package. See main repository for license information. + diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..cbb9982 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,616 @@ +--- +name: scientific-schematics +description: "Create publication-quality scientific diagrams using Nano Banana Pro AI with smart iterative refinement. Uses Gemini 3 Pro for quality review. Only regenerates if quality is below threshold for your document type. Specialized in neural network architectures, system diagrams, flowcharts, biological pathways, and complex scientific visualizations." +allowed-tools: [Read, Write, Edit, Bash] +--- + +# Scientific Schematics and Diagrams + +## Overview + +Scientific schematics and diagrams transform complex concepts into clear visual representations for publication. **This skill uses Nano Banana Pro AI for diagram generation with Gemini 3 Pro quality review.** + +**How it works:** +- Describe your diagram in natural language +- Nano Banana Pro generates publication-quality images automatically +- **Gemini 3 Pro reviews quality** against document-type thresholds +- **Smart iteration**: Only regenerates if quality is below threshold +- Publication-ready output in minutes +- No coding, templates, or manual drawing required + +**Quality Thresholds by Document Type:** +| Document Type | Threshold | Description | +|---------------|-----------|-------------| +| journal | 8.5/10 | Nature, Science, peer-reviewed journals | +| conference | 8.0/10 | Conference papers | +| thesis | 8.0/10 | Dissertations, theses | +| grant | 8.0/10 | Grant proposals | +| preprint | 7.5/10 | arXiv, bioRxiv, etc. | +| report | 7.5/10 | Technical reports | +| poster | 7.0/10 | Academic posters | +| presentation | 6.5/10 | Slides, talks | +| default | 7.5/10 | General purpose | + +**Simply describe what you want, and Nano Banana Pro creates it.** All diagrams are stored in the figures/ subfolder and referenced in papers/posters. + +## Quick Start: Generate Any Diagram + +Create any scientific diagram by simply describing it. Nano Banana Pro handles everything automatically with **smart iteration**: + +```bash +# Generate for journal paper (highest quality threshold: 8.5/10) +python scripts/generate_schematic.py "CONSORT participant flow diagram with 500 screened, 150 excluded, 350 randomized" -o figures/consort.png --doc-type journal + +# Generate for presentation (lower threshold: 6.5/10 - faster) +python scripts/generate_schematic.py "Transformer encoder-decoder architecture showing multi-head attention" -o figures/transformer.png --doc-type presentation + +# Generate for poster (moderate threshold: 7.0/10) +python scripts/generate_schematic.py "MAPK signaling pathway from EGFR to gene transcription" -o figures/mapk_pathway.png --doc-type poster + +# Custom max iterations (max 2) +python scripts/generate_schematic.py "Complex circuit diagram with op-amp, resistors, and capacitors" -o figures/circuit.png --iterations 2 --doc-type journal +``` + +**What happens behind the scenes:** +1. **Generation 1**: Nano Banana Pro creates initial image following scientific diagram best practices +2. **Review 1**: **Gemini 3 Pro** evaluates quality against document-type threshold +3. **Decision**: If quality >= threshold → **DONE** (no more iterations needed!) +4. **If below threshold**: Improved prompt based on critique, regenerate +5. **Repeat**: Until quality meets threshold OR max iterations reached + +**Smart Iteration Benefits:** +- ✅ Saves API calls if first generation is good enough +- ✅ Higher quality standards for journal papers +- ✅ Faster turnaround for presentations/posters +- ✅ Appropriate quality for each use case + +**Output**: Versioned images plus a detailed review log with quality scores, critiques, and early-stop information. + +### Configuration + +Set your OpenRouter API key: +```bash +export OPENROUTER_API_KEY='your_api_key_here' +``` + +Get an API key at: https://openrouter.ai/keys + +### AI Generation Best Practices + +**Effective Prompts for Scientific Diagrams:** + +✓ **Good prompts** (specific, detailed): +- "CONSORT flowchart showing participant flow from screening (n=500) through randomization to final analysis" +- "Transformer neural network architecture with encoder stack on left, decoder stack on right, showing multi-head attention and cross-attention connections" +- "Biological signaling cascade: EGFR receptor → RAS → RAF → MEK → ERK → nucleus, with phosphorylation steps labeled" +- "Block diagram of IoT system: sensors → microcontroller → WiFi module → cloud server → mobile app" + +✗ **Avoid vague prompts**: +- "Make a flowchart" (too generic) +- "Neural network" (which type? what components?) +- "Pathway diagram" (which pathway? what molecules?) + +**Key elements to include:** +- **Type**: Flowchart, architecture diagram, pathway, circuit, etc. +- **Components**: Specific elements to include +- **Flow/Direction**: How elements connect (left-to-right, top-to-bottom) +- **Labels**: Key annotations or text to include +- **Style**: Any specific visual requirements + +**Scientific Quality Guidelines** (automatically applied): +- Clean white/light background +- High contrast for readability +- Clear, readable labels (minimum 10pt) +- Professional typography (sans-serif fonts) +- Colorblind-friendly colors (Okabe-Ito palette) +- Proper spacing to prevent crowding +- Scale bars, legends, axes where appropriate +- **No figure numbers** - figures should not include "Figure 1:", "Fig. 1", or similar labels (these are added by the document/LaTeX) + +## When to Use This Skill + +This skill should be used when: +- Creating neural network architecture diagrams (Transformers, CNNs, RNNs, etc.) +- Illustrating system architectures and data flow diagrams +- Drawing methodology flowcharts for study design (CONSORT, PRISMA) +- Visualizing algorithm workflows and processing pipelines +- Creating circuit diagrams and electrical schematics +- Depicting biological pathways and molecular interactions +- Generating network topologies and hierarchical structures +- Illustrating conceptual frameworks and theoretical models +- Designing block diagrams for technical papers + +## How to Use This Skill + +**Simply describe your diagram in natural language.** Nano Banana Pro generates it automatically: + +```bash +python scripts/generate_schematic.py "your diagram description" -o output.png +``` + +**That's it!** The AI handles: +- ✓ Layout and composition +- ✓ Labels and annotations +- ✓ Colors and styling +- ✓ Quality review and refinement +- ✓ Publication-ready output + +**Works for all diagram types:** +- Flowcharts (CONSORT, PRISMA, etc.) +- Neural network architectures +- Biological pathways +- Circuit diagrams +- System architectures +- Block diagrams +- Any scientific visualization + +**No coding, no templates, no manual drawing required.** + +--- + +# AI Generation Mode (Nano Banana Pro + Gemini 3 Pro Review) + +## Smart Iterative Refinement Workflow + +The AI generation system uses **smart iteration** - it only regenerates if quality is below the threshold for your document type: + +### How Smart Iteration Works + +``` +┌─────────────────────────────────────────────────────┐ +│ 1. Generate image with Nano Banana Pro │ +│ ↓ │ +│ 2. Review quality with Gemini 3 Pro │ +│ ↓ │ +│ 3. Score >= threshold? │ +│ YES → DONE! (early stop) │ +│ NO → Improve prompt, go to step 1 │ +│ ↓ │ +│ 4. Repeat until quality met OR max iterations │ +└─────────────────────────────────────────────────────┘ +``` + +### Iteration 1: Initial Generation +**Prompt Construction:** +``` +Scientific diagram guidelines + User request +``` + +**Output:** `diagram_v1.png` + +### Quality Review by Gemini 3 Pro + +Gemini 3 Pro evaluates the diagram on: +1. **Scientific Accuracy** (0-2 points) - Correct concepts, notation, relationships +2. **Clarity and Readability** (0-2 points) - Easy to understand, clear hierarchy +3. **Label Quality** (0-2 points) - Complete, readable, consistent labels +4. **Layout and Composition** (0-2 points) - Logical flow, balanced, no overlaps +5. **Professional Appearance** (0-2 points) - Publication-ready quality + +**Example Review Output:** +``` +SCORE: 8.0 + +STRENGTHS: +- Clear flow from top to bottom +- All phases properly labeled +- Professional typography + +ISSUES: +- Participant counts slightly small +- Minor overlap on exclusion box + +VERDICT: ACCEPTABLE (for poster, threshold 7.0) +``` + +### Decision Point: Continue or Stop? + +| If Score... | Action | +|-------------|--------| +| >= threshold | **STOP** - Quality is good enough for this document type | +| < threshold | Continue to next iteration with improved prompt | + +**Example:** +- For a **poster** (threshold 7.0): Score of 7.5 → **DONE after 1 iteration!** +- For a **journal** (threshold 8.5): Score of 7.5 → Continue improving + +### Subsequent Iterations (Only If Needed) + +If quality is below threshold, the system: +1. Extracts specific issues from Gemini 3 Pro's review +2. Enhances the prompt with improvement instructions +3. Regenerates with Nano Banana Pro +4. Reviews again with Gemini 3 Pro +5. Repeats until threshold met or max iterations reached + +### Review Log +All iterations are saved with a JSON review log that includes early-stop information: +```json +{ + "user_prompt": "CONSORT participant flow diagram...", + "doc_type": "poster", + "quality_threshold": 7.0, + "iterations": [ + { + "iteration": 1, + "image_path": "figures/consort_v1.png", + "score": 7.5, + "needs_improvement": false, + "critique": "SCORE: 7.5\nSTRENGTHS:..." + } + ], + "final_score": 7.5, + "early_stop": true, + "early_stop_reason": "Quality score 7.5 meets threshold 7.0 for poster" +} +``` + +**Note:** With smart iteration, you may see only 1 iteration instead of the full 2 if quality is achieved early! + +## Advanced AI Generation Usage + +### Python API + +```python +from scripts.generate_schematic_ai import ScientificSchematicGenerator + +# Initialize generator +generator = ScientificSchematicGenerator( + api_key="your_openrouter_key", + verbose=True +) + +# Generate with iterative refinement (max 2 iterations) +results = generator.generate_iterative( + user_prompt="Transformer architecture diagram", + output_path="figures/transformer.png", + iterations=2 +) + +# Access results +print(f"Final score: {results['final_score']}/10") +print(f"Final image: {results['final_image']}") + +# Review individual iterations +for iteration in results['iterations']: + print(f"Iteration {iteration['iteration']}: {iteration['score']}/10") + print(f"Critique: {iteration['critique']}") +``` + +### Command-Line Options + +```bash +# Basic usage (default threshold 7.5/10) +python scripts/generate_schematic.py "diagram description" -o output.png + +# Specify document type for appropriate quality threshold +python scripts/generate_schematic.py "diagram" -o out.png --doc-type journal # 8.5/10 +python scripts/generate_schematic.py "diagram" -o out.png --doc-type conference # 8.0/10 +python scripts/generate_schematic.py "diagram" -o out.png --doc-type poster # 7.0/10 +python scripts/generate_schematic.py "diagram" -o out.png --doc-type presentation # 6.5/10 + +# Custom max iterations (1-2) +python scripts/generate_schematic.py "complex diagram" -o diagram.png --iterations 2 + +# Verbose output (see all API calls and reviews) +python scripts/generate_schematic.py "flowchart" -o flow.png -v + +# Provide API key via flag +python scripts/generate_schematic.py "diagram" -o out.png --api-key "sk-or-v1-..." + +# Combine options +python scripts/generate_schematic.py "neural network" -o nn.png --doc-type journal --iterations 2 -v +``` + +### Prompt Engineering Tips + +**1. Be Specific About Layout:** +``` +✓ "Flowchart with vertical flow, top to bottom" +✓ "Architecture diagram with encoder on left, decoder on right" +✓ "Circular pathway diagram with clockwise flow" +``` + +**2. Include Quantitative Details:** +``` +✓ "Neural network with input layer (784 nodes), hidden layer (128 nodes), output (10 nodes)" +✓ "Flowchart showing n=500 screened, n=150 excluded, n=350 randomized" +✓ "Circuit with 1kΩ resistor, 10µF capacitor, 5V source" +``` + +**3. Specify Visual Style:** +``` +✓ "Minimalist block diagram with clean lines" +✓ "Detailed biological pathway with protein structures" +✓ "Technical schematic with engineering notation" +``` + +**4. Request Specific Labels:** +``` +✓ "Label all arrows with activation/inhibition" +✓ "Include layer dimensions in each box" +✓ "Show time progression with timestamps" +``` + +**5. Mention Color Requirements:** +``` +✓ "Use colorblind-friendly colors" +✓ "Grayscale-compatible design" +✓ "Color-code by function: blue for input, green for processing, red for output" +``` + +## AI Generation Examples + +### Example 1: CONSORT Flowchart +```bash +python scripts/generate_schematic.py \ + "CONSORT participant flow diagram for randomized controlled trial. \ + Start with 'Assessed for eligibility (n=500)' at top. \ + Show 'Excluded (n=150)' with reasons: age<18 (n=80), declined (n=50), other (n=20). \ + Then 'Randomized (n=350)' splits into two arms: \ + 'Treatment group (n=175)' and 'Control group (n=175)'. \ + Each arm shows 'Lost to follow-up' (n=15 and n=10). \ + End with 'Analyzed' (n=160 and n=165). \ + Use blue boxes for process steps, orange for exclusion, green for final analysis." \ + -o figures/consort.png +``` + +### Example 2: Neural Network Architecture +```bash +python scripts/generate_schematic.py \ + "Transformer encoder-decoder architecture diagram. \ + Left side: Encoder stack with input embedding, positional encoding, \ + multi-head self-attention, add & norm, feed-forward, add & norm. \ + Right side: Decoder stack with output embedding, positional encoding, \ + masked self-attention, add & norm, cross-attention (receiving from encoder), \ + add & norm, feed-forward, add & norm, linear & softmax. \ + Show cross-attention connection from encoder to decoder with dashed line. \ + Use light blue for encoder, light red for decoder. \ + Label all components clearly." \ + -o figures/transformer.png --iterations 2 +``` + +### Example 3: Biological Pathway +```bash +python scripts/generate_schematic.py \ + "MAPK signaling pathway diagram. \ + Start with EGFR receptor at cell membrane (top). \ + Arrow down to RAS (with GTP label). \ + Arrow to RAF kinase. \ + Arrow to MEK kinase. \ + Arrow to ERK kinase. \ + Final arrow to nucleus showing gene transcription. \ + Label each arrow with 'phosphorylation' or 'activation'. \ + Use rounded rectangles for proteins, different colors for each. \ + Include membrane boundary line at top." \ + -o figures/mapk_pathway.png +``` + +### Example 4: System Architecture +```bash +python scripts/generate_schematic.py \ + "IoT system architecture block diagram. \ + Bottom layer: Sensors (temperature, humidity, motion) in green boxes. \ + Middle layer: Microcontroller (ESP32) in blue box. \ + Connections to WiFi module (orange box) and Display (purple box). \ + Top layer: Cloud server (gray box) connected to mobile app (light blue box). \ + Show data flow arrows between all components. \ + Label connections with protocols: I2C, UART, WiFi, HTTPS." \ + -o figures/iot_architecture.png +``` + +--- + +## Command-Line Usage + +The main entry point for generating scientific schematics: + +```bash +# Basic usage +python scripts/generate_schematic.py "diagram description" -o output.png + +# Custom iterations (max 2) +python scripts/generate_schematic.py "complex diagram" -o diagram.png --iterations 2 + +# Verbose mode +python scripts/generate_schematic.py "diagram" -o out.png -v +``` + +**Note:** The Nano Banana Pro AI generation system includes automatic quality review in its iterative refinement process. Each iteration is evaluated for scientific accuracy, clarity, and accessibility. + +## Best Practices Summary + +### Design Principles + +1. **Clarity over complexity** - Simplify, remove unnecessary elements +2. **Consistent styling** - Use templates and style files +3. **Colorblind accessibility** - Use Okabe-Ito palette, redundant encoding +4. **Appropriate typography** - Sans-serif fonts, minimum 7-8 pt +5. **Vector format** - Always use PDF/SVG for publication + +### Technical Requirements + +1. **Resolution** - Vector preferred, or 300+ DPI for raster +2. **File format** - PDF for LaTeX, SVG for web, PNG as fallback +3. **Color space** - RGB for digital, CMYK for print (convert if needed) +4. **Line weights** - Minimum 0.5 pt, typical 1-2 pt +5. **Text size** - 7-8 pt minimum at final size + +### Integration Guidelines + +1. **Include in LaTeX** - Use `\includegraphics{}` for generated images +2. **Caption thoroughly** - Describe all elements and abbreviations +3. **Reference in text** - Explain diagram in narrative flow +4. **Maintain consistency** - Same style across all figures in paper +5. **Version control** - Keep prompts and generated images in repository + +## Troubleshooting Common Issues + +### AI Generation Issues + +**Problem**: Overlapping text or elements +- **Solution**: AI generation automatically handles spacing +- **Solution**: Increase iterations: `--iterations 2` for better refinement + +**Problem**: Elements not connecting properly +- **Solution**: Make your prompt more specific about connections and layout +- **Solution**: Increase iterations for better refinement + +### Image Quality Issues + +**Problem**: Export quality poor +- **Solution**: AI generation produces high-quality images automatically +- **Solution**: Increase iterations for better results: `--iterations 2` + +**Problem**: Elements overlap after generation +- **Solution**: AI generation automatically handles spacing +- **Solution**: Increase iterations: `--iterations 2` for better refinement +- **Solution**: Make your prompt more specific about layout and spacing requirements + +### Quality Check Issues + +**Problem**: False positive overlap detection +- **Solution**: Adjust threshold: `detect_overlaps(image_path, threshold=0.98)` +- **Solution**: Manually review flagged regions in visual report + +**Problem**: Generated image quality is low +- **Solution**: AI generation produces high-quality images by default +- **Solution**: Increase iterations for better results: `--iterations 2` + +**Problem**: Colorblind simulation shows poor contrast +- **Solution**: Switch to Okabe-Ito palette explicitly in code +- **Solution**: Add redundant encoding (shapes, patterns, line styles) +- **Solution**: Increase color saturation and lightness differences + +**Problem**: High-severity overlaps detected +- **Solution**: Review overlap_report.json for exact positions +- **Solution**: Increase spacing in those specific regions +- **Solution**: Re-run with adjusted parameters and verify again + +**Problem**: Visual report generation fails +- **Solution**: Check Pillow and matplotlib installations +- **Solution**: Ensure image file is readable: `Image.open(path).verify()` +- **Solution**: Check sufficient disk space for report generation + +### Accessibility Problems + +**Problem**: Colors indistinguishable in grayscale +- **Solution**: Run accessibility checker: `verify_accessibility(image_path)` +- **Solution**: Add patterns, shapes, or line styles for redundancy +- **Solution**: Increase contrast between adjacent elements + +**Problem**: Text too small when printed +- **Solution**: Run resolution validator: `validate_resolution(image_path)` +- **Solution**: Design at final size, use minimum 7-8 pt fonts +- **Solution**: Check physical dimensions in resolution report + +**Problem**: Accessibility checks consistently fail +- **Solution**: Review accessibility_report.json for specific failures +- **Solution**: Increase color contrast by at least 20% +- **Solution**: Test with actual grayscale conversion before finalizing + +## Resources and References + +### Detailed References + +Load these files for comprehensive information on specific topics: + +- **`references/diagram_types.md`** - Catalog of scientific diagram types with examples +- **`references/best_practices.md`** - Publication standards and accessibility guidelines + +### External Resources + +**Python Libraries** +- Schemdraw Documentation: https://schemdraw.readthedocs.io/ +- NetworkX Documentation: https://networkx.org/documentation/ +- Matplotlib Documentation: https://matplotlib.org/ + +**Publication Standards** +- Nature Figure Guidelines: https://www.nature.com/nature/for-authors/final-submission +- Science Figure Guidelines: https://www.science.org/content/page/instructions-preparing-initial-manuscript +- CONSORT Diagram: http://www.consort-statement.org/consort-statement/flow-diagram + +## Integration with Other Skills + +This skill works synergistically with: + +- **Scientific Writing** - Diagrams follow figure best practices +- **Scientific Visualization** - Shares color palettes and styling +- **LaTeX Posters** - Generate diagrams for poster presentations +- **Research Grants** - Methodology diagrams for proposals +- **Peer Review** - Evaluate diagram clarity and accessibility + +## Quick Reference Checklist + +Before submitting diagrams, verify: + +### Visual Quality +- [ ] High-quality image format (PNG from AI generation) +- [ ] No overlapping elements (AI handles automatically) +- [ ] Adequate spacing between all components (AI optimizes) +- [ ] Clean, professional alignment +- [ ] All arrows connect properly to intended targets + +### Accessibility +- [ ] Colorblind-safe palette (Okabe-Ito) used +- [ ] Works in grayscale (tested with accessibility checker) +- [ ] Sufficient contrast between elements (verified) +- [ ] Redundant encoding where appropriate (shapes + colors) +- [ ] Colorblind simulation passes all checks + +### Typography and Readability +- [ ] Text minimum 7-8 pt at final size +- [ ] All elements labeled clearly and completely +- [ ] Consistent font family and sizing +- [ ] No text overlaps or cutoffs +- [ ] Units included where applicable + +### Publication Standards +- [ ] Consistent styling with other figures in manuscript +- [ ] Comprehensive caption written with all abbreviations defined +- [ ] Referenced appropriately in manuscript text +- [ ] Meets journal-specific dimension requirements +- [ ] Exported in required format for journal (PDF/EPS/TIFF) + +### Quality Verification (Required) +- [ ] Ran `run_quality_checks()` and achieved PASS status +- [ ] Reviewed overlap detection report (zero high-severity overlaps) +- [ ] Passed accessibility verification (grayscale and colorblind) +- [ ] Resolution validated at target DPI (300+ for print) +- [ ] Visual quality report generated and reviewed +- [ ] All quality reports saved with figure files + +### Documentation and Version Control +- [ ] Source files (.tex, .py) saved for future revision +- [ ] Quality reports archived in `quality_reports/` directory +- [ ] Configuration parameters documented (colors, spacing, sizes) +- [ ] Git commit includes source, output, and quality reports +- [ ] README or comments explain how to regenerate figure + +### Final Integration Check +- [ ] Figure displays correctly in compiled manuscript +- [ ] Cross-references work (`\ref{}` points to correct figure) +- [ ] Figure number matches text citations +- [ ] Caption appears on correct page relative to figure +- [ ] No compilation warnings or errors related to figure + +## Environment Setup + +```bash +# Required +export OPENROUTER_API_KEY='your_api_key_here' + +# Get key at: https://openrouter.ai/keys +``` + +## Getting Started + +**Simplest possible usage:** +```bash +python scripts/generate_schematic.py "your diagram description" -o output.png +``` + +--- + +Use this skill to create clear, accessible, publication-quality diagrams that effectively communicate complex scientific concepts. The AI-powered workflow with iterative refinement ensures diagrams meet professional standards. + diff --git a/example_usage.sh b/example_usage.sh new file mode 100755 index 0000000..2e638d9 --- /dev/null +++ b/example_usage.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Example usage of AI-powered scientific schematic generation +# +# Prerequisites: +# 1. Set OPENROUTER_API_KEY environment variable +# 2. Ensure Python 3.10+ is installed +# 3. Install requests: pip install requests + +set -e + +echo "==========================================" +echo "Scientific Schematics - AI Generation" +echo "Example Usage Demonstrations" +echo "==========================================" +echo "" + +# Check for API key +if [ -z "$OPENROUTER_API_KEY" ]; then + echo "❌ Error: OPENROUTER_API_KEY environment variable not set" + echo "" + echo "Get an API key at: https://openrouter.ai/keys" + echo "Then set it with: export OPENROUTER_API_KEY='your_key'" + exit 1 +fi + +echo "✓ OPENROUTER_API_KEY is set" +echo "" + +# Create output directory +mkdir -p figures +echo "✓ Created figures/ directory" +echo "" + +# Example 1: Simple flowchart +echo "Example 1: CONSORT Flowchart" +echo "----------------------------" +python scripts/generate_schematic.py \ + "CONSORT participant flow diagram. Assessed for eligibility (n=500). Excluded (n=150) with reasons: age<18 (n=80), declined (n=50), other (n=20). Randomized (n=350) into Treatment (n=175) and Control (n=175). Lost to follow-up: 15 and 10. Final analysis: 160 and 165." \ + -o figures/consort_example.png \ + --iterations 2 + +echo "" +echo "✓ Generated: figures/consort_example.png" +echo " - Also created: consort_example_v1.png, v2.png, v3.png" +echo " - Review log: consort_example_review_log.json" +echo "" + +# Example 2: Neural network (shorter for demo) +echo "Example 2: Simple Neural Network" +echo "--------------------------------" +python scripts/generate_schematic.py \ + "Simple feedforward neural network diagram. Input layer with 4 nodes, hidden layer with 6 nodes, output layer with 2 nodes. Show all connections. Label layers clearly." \ + -o figures/neural_net_example.png \ + --iterations 2 + +echo "" +echo "✓ Generated: figures/neural_net_example.png" +echo "" + +# Example 3: Biological pathway (minimal) +echo "Example 3: Signaling Pathway" +echo "---------------------------" +python scripts/generate_schematic.py \ + "Simple signaling pathway: Receptor → Kinase A → Kinase B → Transcription Factor → Gene. Show arrows with 'activation' labels. Use different colors for each component." \ + -o figures/pathway_example.png \ + --iterations 2 + +echo "" +echo "✓ Generated: figures/pathway_example.png" +echo "" + +echo "==========================================" +echo "All examples completed successfully!" +echo "==========================================" +echo "" +echo "Generated files in figures/:" +ls -lh figures/*example*.png 2>/dev/null || echo " (Files will appear after running with valid API key)" +echo "" +echo "Review the review_log.json files to see:" +echo " - Quality scores for each iteration" +echo " - Detailed critiques and suggestions" +echo " - Improvement progression" +echo "" +echo "Next steps:" +echo " 1. View the generated images" +echo " 2. Review the quality scores in *_review_log.json" +echo " 3. Try your own prompts!" +echo "" + diff --git a/references/best_practices.md b/references/best_practices.md new file mode 100644 index 0000000..e6033b3 --- /dev/null +++ b/references/best_practices.md @@ -0,0 +1,560 @@ +# Best Practices for Scientific Diagrams + +## Overview + +This guide provides publication standards, accessibility guidelines, and best practices for creating high-quality scientific diagrams that meet journal requirements and communicate effectively to all readers. + +## Publication Standards + +### 1. File Format Requirements + +**Vector Formats (Preferred)** +- **PDF**: Universal acceptance, preserves quality, works with LaTeX + - Use for: Line drawings, flowcharts, block diagrams, circuit diagrams + - Advantages: Scalable, small file size, embeds fonts + - Standard for LaTeX workflows + +- **EPS (Encapsulated PostScript)**: Legacy format, still accepted + - Use for: Older publishing systems + - Compatible with most journals + - Can be converted from PDF + +- **SVG (Scalable Vector Graphics)**: Web-friendly, increasingly accepted + - Use for: Online publications, interactive figures + - Can be edited in vector graphics software + - Not all journals accept SVG + +**Raster Formats (When Necessary)** +- **TIFF**: Professional standard for raster graphics + - Use for: Microscopy images, photographs combined with diagrams + - Minimum 300 DPI at final print size + - Lossless compression (LZW) + +- **PNG**: Web-friendly, lossless compression + - Use for: Online supplementary materials, presentations + - Minimum 300 DPI for print + - Supports transparency + +**Never Use** +- **JPEG**: Lossy compression creates artifacts in diagrams +- **GIF**: Limited colors, inappropriate for scientific figures +- **BMP**: Uncompressed, unnecessarily large files + +### 2. Resolution Requirements + +**Vector Graphics** +- Infinite resolution (scalable) +- **Recommended**: Always use vector when possible + +**Raster Graphics (when vector not possible)** +- **Publication quality**: 300-600 DPI +- **Line art**: 600-1200 DPI +- **Web/screen**: 150 DPI acceptable +- **Never**: Below 300 DPI for print + +**Calculating DPI** +``` +DPI = pixels / (inches at final size) + +Example: +Image size: 2400 × 1800 pixels +Final print size: 8 × 6 inches +DPI = 2400 / 8 = 300 ✓ (acceptable) +``` + +### 3. Size and Dimensions + +**Journal-Specific Column Widths** +- **Nature**: Single column 89 mm (3.5 in), Double 183 mm (7.2 in) +- **Science**: Single column 55 mm (2.17 in), Double 120 mm (4.72 in) +- **Cell**: Single column 85 mm (3.35 in), Double 178 mm (7 in) +- **PLOS**: Single column 83 mm (3.27 in), Double 173 mm (6.83 in) +- **IEEE**: Single column 3.5 in, Double 7.16 in + +**Best Practices** +- Design at final print size (avoid scaling) +- Use journal templates when available +- Allow margins for cropping +- Test appearance at final size before submission + +### 4. Typography Standards + +**Font Selection** +- **Recommended**: Arial, Helvetica, Calibri (sans-serif) +- **Acceptable**: Times New Roman (serif) for mathematics-heavy +- **Avoid**: Decorative fonts, script fonts, system fonts that may not embed + +**Font Sizes (at final print size)** +- **Minimum**: 6-7 pt (journal dependent) +- **Axis labels**: 8-9 pt +- **Figure labels**: 10-12 pt +- **Panel labels (A, B, C)**: 10-14 pt, bold +- **Main text**: Should match manuscript body text + +**Text Clarity** +- Use sentence case: "Time (seconds)" not "TIME (SECONDS)" +- Include units in parentheses: "Temperature (°C)" +- Spell out abbreviations in figure caption +- Avoid rotated text when possible (exception: y-axis labels) +- **No figure numbers in diagram** - do not include "Figure 1:", "Fig. 1", etc. (these are added by LaTeX/document) + +### 5. Line Weights and Strokes + +**Recommended Line Widths** +- **Diagram outlines**: 0.5-1.0 pt +- **Connection lines/arrows**: 1.0-2.0 pt +- **Emphasis elements**: 2.0-3.0 pt +- **Minimum visible**: 0.25 pt at final size + +**Consistency** +- Use same line weight for similar elements +- Vary line weight to show hierarchy +- Avoid hairline rules (too thin to print reliably) + +## Accessibility and Colorblindness + +### 1. Colorblind-Safe Palettes + +**Okabe-Ito Palette (Recommended)** +Most distinguishable by all types of colorblindness: + +```latex +% RGB values +Orange: #E69F00 (230, 159, 0) +Sky Blue: #56B4E9 ( 86, 180, 233) +Green: #009E73 ( 0, 158, 115) +Yellow: #F0E442 (240, 228, 66) +Blue: #0072B2 ( 0, 114, 178) +Vermillion: #D55E00 (213, 94, 0) +Purple: #CC79A7 (204, 121, 167) +Black: #000000 ( 0, 0, 0) +``` + +**Alternative: ColorBrewer Palettes** +- **Qualitative**: Set2, Paired, Dark2 +- **Sequential**: Blues, Greens, Oranges (avoid Reds/Greens together) +- **Diverging**: RdBu (Red-Blue), PuOr (Purple-Orange) + +**Colors to Avoid Together** +- Red-Green combinations (8% of males cannot distinguish) +- Blue-Purple combinations +- Yellow-Light green combinations + +### 2. Redundant Encoding + +Don't rely on color alone. Use multiple visual channels: + +**Shape + Color** +``` +Circle + Blue = Condition A +Square + Orange = Condition B +Triangle + Green = Condition C +``` + +**Line Style + Color** +``` +Solid + Blue = Treatment 1 +Dashed + Orange = Treatment 2 +Dotted + Green = Control +``` + +**Pattern Fill + Color** +``` +Solid fill + Blue = Group A +Diagonal stripes + Orange = Group B +Cross-hatch + Green = Group C +``` + +### 3. Grayscale Compatibility + +**Test Requirement**: All diagrams must be interpretable in grayscale + +**Strategies** +- Use different shades (light, medium, dark) +- Add patterns or textures to filled areas +- Vary line styles (solid, dashed, dotted) +- Use labels directly on elements +- Include text annotations + +**Grayscale Test** +```bash +# Convert to grayscale to test +convert diagram.pdf -colorspace gray diagram_gray.pdf +``` + +### 4. Contrast Requirements + +**Minimum Contrast Ratios (WCAG Guidelines)** +- **Normal text**: 4.5:1 +- **Large text** (≥18pt): 3:1 +- **Graphical elements**: 3:1 + +**High Contrast Practices** +- Dark text on light background (or vice versa) +- Avoid low-contrast color pairs (yellow on white, light gray on white) +- Use black or dark gray for critical text +- White text on dark backgrounds needs larger font size + +### 5. Alternative Text and Descriptions + +**Figure Captions Must Include** +- Description of diagram type +- All abbreviations spelled out +- Explanation of symbols and colors +- Sample sizes (n) where relevant +- Statistical annotations explained +- Reference to detailed methods if applicable + +**Example Caption** +"Participant flow diagram following CONSORT guidelines. Rectangles represent study stages, with participant numbers (n) shown. Exclusion criteria are listed beside each screening stage. Final analysis included n=350 participants across two groups." + +## Design Principles + +### 1. Simplicity and Clarity + +**Occam's Razor for Diagrams** +- Remove every element that doesn't add information +- Simplify complex relationships +- Break complex diagrams into multiple panels +- Use consistent layouts across related figures + +**Visual Hierarchy** +- Most important elements: Largest, darkest, central +- Supporting elements: Smaller, lighter, peripheral +- Annotations: Minimal, clear labels only + +### 2. Consistency + +**Within a Figure** +- Same shape/color represents same concept +- Consistent arrow styles for same relationships +- Uniform spacing and alignment +- Matching font sizes for similar elements + +**Across Figures in a Paper** +- Reuse color schemes +- Maintain consistent node styles +- Use same notation system +- Apply same layout principles + +### 3. Professional Appearance + +**Alignment** +- Use grids for node placement +- Align nodes horizontally or vertically +- Evenly space elements +- Center labels within shapes + +**White Space** +- Don't overcrowd diagrams +- Leave breathing room around elements +- Use white space to group related items +- Margins around entire diagram + +**Polish** +- No jagged lines or misaligned elements +- Smooth curves and precise angles +- Clean connection points +- No overlapping text + +## Common Pitfalls and Solutions + +### Pitfall 1: Overcomplicated Diagrams + +**Problem**: Too much information in one diagram +**Solution**: +- Split into multiple panels (A, B, C) +- Create overview + detailed diagrams +- Move details to supplementary figures +- Use hierarchical presentation + +### Pitfall 2: Inconsistent Styling + +**Problem**: Different styles for same elements across figures +**Solution**: +- Create and use style templates +- Use the same color palette throughout +- Document your style choices + +### Pitfall 3: Poor Label Placement + +**Problem**: Labels overlap elements or are hard to read +**Solution**: +- Place labels outside shapes when possible +- Use leader lines for distant labels +- Rotate text only when necessary +- Ensure adequate contrast with background + +### Pitfall 4: Tiny Text + +**Problem**: Text too small to read at final print size +**Solution**: +- Design at final size from the start +- Test print at final size +- Minimum 7-8 pt font +- Simplify labels if space is limited + +### Pitfall 5: Ambiguous Arrows + +**Problem**: Unclear what arrows represent or where they point +**Solution**: +- Use different arrow styles for different meanings +- Add labels to arrows +- Include legend for arrow types +- Use anchor points for precise connections + +### Pitfall 6: Color Overuse + +**Problem**: Too many colors, confusing or inaccessible +**Solution**: +- Limit to 3-5 colors maximum +- Use color purposefully (categories, emphasis) +- Stick to colorblind-safe palette +- Provide redundant encoding + +## Quality Control Checklist + +### Before Submission + +**Technical Requirements** +- [ ] Correct file format (PDF/EPS preferred for diagrams) +- [ ] Sufficient resolution (vector or 300+ DPI) +- [ ] Appropriate size (matches journal column width) +- [ ] Fonts embedded in PDF +- [ ] No compression artifacts + +**Accessibility** +- [ ] Colorblind-safe palette used +- [ ] Works in grayscale (tested) +- [ ] Text minimum 7-8 pt at final size +- [ ] High contrast between elements +- [ ] Redundant encoding (not color alone) + +**Design Quality** +- [ ] Elements aligned properly +- [ ] Consistent spacing and layout +- [ ] No overlapping text or elements +- [ ] Clear visual hierarchy +- [ ] Professional appearance + +**Content** +- [ ] All elements labeled +- [ ] Abbreviations defined +- [ ] Units included where relevant +- [ ] Legend provided if needed +- [ ] Caption comprehensive + +**Consistency** +- [ ] Matches other figures in style +- [ ] Same notation as text +- [ ] Consistent with journal guidelines +- [ ] Cross-references work + +## Journal-Specific Guidelines + +### Nature + +**Figure Requirements** +- **Size**: 89 mm (single) or 183 mm (double column) +- **Format**: PDF, EPS, or high-res TIFF +- **Fonts**: Sans-serif preferred +- **File size**: <10 MB per file +- **Resolution**: 300 DPI minimum for raster + +**Style Notes** +- Panel labels: lowercase bold (a, b, c) +- Simple, clean design +- Minimal colors +- Clear captions + +### Science + +**Figure Requirements** +- **Size**: 55 mm (single) or 120 mm (double column) +- **Format**: PDF, EPS, TIFF, or JPEG (high quality) +- **Resolution**: 300 DPI for photos, 600 DPI for line art +- **File size**: <10 MB +- **Fonts**: 6-7 pt minimum + +**Style Notes** +- Panel labels: capital bold (A, B, C) +- High contrast +- Readable at small size + +### Cell + +**Figure Requirements** +- **Size**: 85 mm (single) or 178 mm (double column) +- **Format**: PDF preferred, TIFF, EPS acceptable +- **Resolution**: 300 DPI minimum +- **Fonts**: 8-10 pt for labels +- **Line weight**: 0.5 pt minimum + +**Style Notes** +- Clean, professional +- Color or grayscale +- Panel labels capital (A, B, C) + +### IEEE + +**Figure Requirements** +- **Size**: 3.5 in (single) or 7.16 in (double column) +- **Format**: PDF, EPS (vector preferred) +- **Resolution**: 600 DPI for line art, 300 DPI for halftone +- **Fonts**: 8-10 pt minimum +- **Color**: Grayscale in print, color in digital + +**Style Notes** +- Follow IEEE Graphics Manual +- Standard symbols for circuits +- Technical precision +- Clear axis labels + +## Software-Specific Export Settings + +### AI-Generated Images + +AI-generated diagrams are exported as PNG images and can be included in LaTeX documents using: + +```latex +\includegraphics[width=\textwidth]{diagram.png} +``` + +### Python (Matplotlib) Export + +```python +import matplotlib.pyplot as plt + +# Set publication quality +plt.rcParams['font.family'] = 'sans-serif' +plt.rcParams['font.sans-serif'] = ['Arial'] +plt.rcParams['font.size'] = 8 +plt.rcParams['pdf.fonttype'] = 42 # TrueType fonts in PDF + +# Save with proper DPI and cropping +fig.savefig('diagram.pdf', dpi=300, bbox_inches='tight', + pad_inches=0.1, transparent=False) +fig.savefig('diagram.png', dpi=300, bbox_inches='tight') +``` + +### Schemdraw Export + +```python +import schemdraw + +d = schemdraw.Drawing() +# ... build circuit ... + +# Export +d.save('circuit.svg') # Vector +d.save('circuit.pdf') # Vector +d.save('circuit.png', dpi=300) # Raster +``` + +### Inkscape Command Line + +```bash +# PDF to high-res PNG +inkscape diagram.pdf --export-png=diagram.png --export-dpi=300 + +# SVG to PDF +inkscape diagram.svg --export-pdf=diagram.pdf +``` + +## Version Control Best Practices + +**Keep Source Files** +- Save original .tex, .py, or .svg files +- Use descriptive filenames with versions +- Document color palette and style choices +- Include README with regeneration instructions + +**Directory Structure** +``` +figures/ +├── source/ # Editable source files +│ ├── diagram1.tex +│ ├── circuit.py +│ └── pathway.svg +├── generated/ # Auto-generated outputs +│ ├── diagram1.pdf +│ ├── circuit.pdf +│ └── pathway.pdf +└── final/ # Final submission versions + ├── figure1.pdf + └── figure2.pdf +``` + +**Git Tracking** +- Track source files (.tex, .py) +- Consider .gitignore for generated PDFs (large files) +- Use releases/tags for submission versions +- Document generation process in README + +## Testing and Validation + +### Pre-Submission Tests + +**Visual Tests** +1. **Print test**: Print at final size, check readability +2. **Grayscale test**: Convert to grayscale, verify interpretability +3. **Zoom test**: View at 400% and 25% to check scalability +4. **Screen test**: View on different devices (phone, tablet, desktop) + +**Technical Tests** +1. **Font embedding**: Check PDF properties +2. **Resolution check**: Verify DPI meets requirements +3. **File size**: Ensure under journal limits +4. **Format compliance**: Verify accepted format + +**Accessibility Tests** +1. **Colorblind simulation**: Use tools like Color Oracle +2. **Contrast checker**: WCAG contrast ratio tools +3. **Screen reader**: Test alt text (for web figures) + +### Tools for Testing + +**Colorblind Simulation** +- Color Oracle (free, cross-platform) +- Coblis (Color Blindness Simulator) +- Photoshop/GIMP colorblind preview modes + +**PDF Inspection** +```bash +# Check PDF properties +pdfinfo diagram.pdf + +# Check fonts +pdffonts diagram.pdf + +# Check image resolution +identify -verbose diagram.pdf +``` + +**Contrast Checking** +- WebAIM Contrast Checker: https://webaim.org/resources/contrastchecker/ +- Colorable: https://colorable.jxnblk.com/ + +## Summary: Golden Rules + +1. **Vector first**: Always use vector formats when possible +2. **Design at final size**: Avoid scaling after creation +3. **Colorblind-safe palette**: Use Okabe-Ito or similar +4. **Test in grayscale**: Diagrams must work without color +5. **Minimum 7-8 pt text**: At final print size +6. **Consistent styling**: Across all figures in paper +7. **Keep it simple**: Remove unnecessary elements +8. **High contrast**: Ensure readability +9. **Align elements**: Professional appearance matters +10. **Comprehensive caption**: Explain everything + +## Further Resources + +- **Nature Figure Preparation**: https://www.nature.com/nature/for-authors/final-submission +- **Science Figure Guidelines**: https://www.science.org/content/page/instructions-preparing-initial-manuscript +- **WCAG Accessibility Standards**: https://www.w3.org/WAI/WCAG21/quickref/ +- **Color Universal Design (CUD)**: https://jfly.uni-koeln.de/color/ +- **ColorBrewer**: https://colorbrewer2.org/ + +Following these best practices ensures your diagrams meet publication standards and effectively communicate to all readers, regardless of colorblindness or viewing conditions. + diff --git a/scripts/generate_schematic.py b/scripts/generate_schematic.py new file mode 100644 index 0000000..e5146ce --- /dev/null +++ b/scripts/generate_schematic.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +""" +Scientific schematic generation using Nano Banana Pro. + +Generate any scientific diagram by describing it in natural language. +Nano Banana Pro handles everything automatically with smart iterative refinement. + +Smart iteration: Only regenerates if quality is below threshold for your document type. +Quality review: Uses Gemini 3 Pro for professional scientific evaluation. + +Usage: + # Generate for journal paper (highest quality threshold) + python generate_schematic.py "CONSORT flowchart" -o flowchart.png --doc-type journal + + # Generate for presentation (lower threshold, faster) + python generate_schematic.py "Transformer architecture" -o transformer.png --doc-type presentation + + # Generate for poster + python generate_schematic.py "MAPK signaling pathway" -o pathway.png --doc-type poster +""" + +import argparse +import os +import subprocess +import sys +from pathlib import Path + + +def main(): + """Command-line interface.""" + parser = argparse.ArgumentParser( + description="Generate scientific schematics using AI with smart iterative refinement", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +How it works: + Simply describe your diagram in natural language + Nano Banana Pro generates it automatically with: + - Smart iteration (only regenerates if quality is below threshold) + - Quality review by Gemini 3 Pro + - Document-type aware quality thresholds + - Publication-ready output + +Document Types (quality thresholds): + journal 8.5/10 - Nature, Science, peer-reviewed journals + conference 8.0/10 - Conference papers + thesis 8.0/10 - Dissertations, theses + grant 8.0/10 - Grant proposals + preprint 7.5/10 - arXiv, bioRxiv, etc. + report 7.5/10 - Technical reports + poster 7.0/10 - Academic posters + presentation 6.5/10 - Slides, talks + default 7.5/10 - General purpose + +Examples: + # Generate for journal paper (strict quality) + python generate_schematic.py "CONSORT participant flow" -o flowchart.png --doc-type journal + + # Generate for poster (moderate quality) + python generate_schematic.py "Transformer architecture" -o arch.png --doc-type poster + + # Generate for slides (faster, lower threshold) + python generate_schematic.py "System diagram" -o system.png --doc-type presentation + + # Custom max iterations + python generate_schematic.py "Complex pathway" -o pathway.png --iterations 2 + + # Verbose output + python generate_schematic.py "Circuit diagram" -o circuit.png -v + +Environment Variables: + OPENROUTER_API_KEY Required for AI generation + """ + ) + + parser.add_argument("prompt", + help="Description of the diagram to generate") + parser.add_argument("-o", "--output", required=True, + help="Output file path") + parser.add_argument("--doc-type", default="default", + choices=["journal", "conference", "poster", "presentation", + "report", "grant", "thesis", "preprint", "default"], + help="Document type for quality threshold (default: default)") + parser.add_argument("--iterations", type=int, default=2, + help="Maximum refinement iterations (default: 2, max: 2)") + parser.add_argument("--api-key", + help="OpenRouter API key (or use OPENROUTER_API_KEY env var)") + parser.add_argument("-v", "--verbose", action="store_true", + help="Verbose output") + + args = parser.parse_args() + + # Check for API key + api_key = args.api_key or os.getenv("OPENROUTER_API_KEY") + if not api_key: + print("Error: OPENROUTER_API_KEY environment variable not set") + print("\nFor AI generation, you need an OpenRouter API key.") + print("Get one at: https://openrouter.ai/keys") + print("\nSet it with:") + print(" export OPENROUTER_API_KEY='your_api_key'") + print("\nOr use --api-key flag") + sys.exit(1) + + # Find AI generation script + script_dir = Path(__file__).parent + ai_script = script_dir / "generate_schematic_ai.py" + + if not ai_script.exists(): + print(f"Error: AI generation script not found: {ai_script}") + sys.exit(1) + + # Build command + cmd = [sys.executable, str(ai_script), args.prompt, "-o", args.output] + + if args.doc_type != "default": + cmd.extend(["--doc-type", args.doc_type]) + + # Enforce max 2 iterations + iterations = min(args.iterations, 2) + if iterations != 2: + cmd.extend(["--iterations", str(iterations)]) + + if api_key: + cmd.extend(["--api-key", api_key]) + + if args.verbose: + cmd.append("-v") + + # Execute + try: + result = subprocess.run(cmd, check=False) + sys.exit(result.returncode) + except Exception as e: + print(f"Error executing AI generation: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() + diff --git a/scripts/generate_schematic_ai.py b/scripts/generate_schematic_ai.py new file mode 100644 index 0000000..168609b --- /dev/null +++ b/scripts/generate_schematic_ai.py @@ -0,0 +1,844 @@ +#!/usr/bin/env python3 +""" +AI-powered scientific schematic generation using Nano Banana Pro. + +This script uses a smart iterative refinement approach: +1. Generate initial image with Nano Banana Pro +2. AI quality review using Gemini 3 Pro for scientific critique +3. Only regenerate if quality is below threshold for document type +4. Repeat until quality meets standards (max iterations) + +Requirements: + - OPENROUTER_API_KEY environment variable + - requests library + +Usage: + python generate_schematic_ai.py "Create a flowchart showing CONSORT participant flow" -o flowchart.png + python generate_schematic_ai.py "Neural network architecture diagram" -o architecture.png --iterations 2 + python generate_schematic_ai.py "Simple block diagram" -o diagram.png --doc-type poster +""" + +import argparse +import base64 +import json +import os +import sys +import time +from pathlib import Path +from typing import Optional, Dict, Any, List, Tuple + +try: + import requests +except ImportError: + print("Error: requests library not found. Install with: pip install requests") + sys.exit(1) + +# Try to load .env file from multiple potential locations +def _load_env_file(): + """Load .env file from current directory, parent directories, or package directory. + + Returns True if a .env file was found and loaded, False otherwise. + Note: This does NOT override existing environment variables. + """ + try: + from dotenv import load_dotenv + except ImportError: + return False # python-dotenv not installed + + # Try current working directory first + env_path = Path.cwd() / ".env" + if env_path.exists(): + load_dotenv(dotenv_path=env_path, override=False) + return True + + # Try parent directories (up to 5 levels) + cwd = Path.cwd() + for _ in range(5): + env_path = cwd / ".env" + if env_path.exists(): + load_dotenv(dotenv_path=env_path, override=False) + return True + cwd = cwd.parent + if cwd == cwd.parent: # Reached root + break + + # Try the package's parent directory (scientific-writer project root) + script_dir = Path(__file__).resolve().parent + for _ in range(5): + env_path = script_dir / ".env" + if env_path.exists(): + load_dotenv(dotenv_path=env_path, override=False) + return True + script_dir = script_dir.parent + if script_dir == script_dir.parent: + break + + return False + + +class ScientificSchematicGenerator: + """Generate scientific schematics using AI with smart iterative refinement. + + Uses Gemini 3 Pro for quality review to determine if regeneration is needed. + Multiple passes only occur if the generated schematic doesn't meet the + quality threshold for the target document type. + """ + + # Quality thresholds by document type (score out of 10) + # Higher thresholds for more formal publications + QUALITY_THRESHOLDS = { + "journal": 8.5, # Nature, Science, etc. - highest standards + "conference": 8.0, # Conference papers - high standards + "poster": 7.0, # Academic posters - good quality + "presentation": 6.5, # Slides/talks - clear but less formal + "report": 7.5, # Technical reports - professional + "grant": 8.0, # Grant proposals - must be compelling + "thesis": 8.0, # Dissertations - formal academic + "preprint": 7.5, # arXiv, etc. - good quality + "default": 7.5, # Default threshold + } + + # Scientific diagram best practices prompt template + SCIENTIFIC_DIAGRAM_GUIDELINES = """ +Create a high-quality scientific diagram with these requirements: + +VISUAL QUALITY: +- Clean white or light background (no textures or gradients) +- High contrast for readability and printing +- Professional, publication-ready appearance +- Sharp, clear lines and text +- Adequate spacing between elements to prevent crowding + +TYPOGRAPHY: +- Clear, readable sans-serif fonts (Arial, Helvetica style) +- Minimum 10pt font size for all labels +- Consistent font sizes throughout +- All text horizontal or clearly readable +- No overlapping text + +SCIENTIFIC STANDARDS: +- Accurate representation of concepts +- Clear labels for all components +- Include scale bars, legends, or axes where appropriate +- Use standard scientific notation and symbols +- Include units where applicable + +ACCESSIBILITY: +- Colorblind-friendly color palette (use Okabe-Ito colors if using color) +- High contrast between elements +- Redundant encoding (shapes + colors, not just colors) +- Works well in grayscale + +LAYOUT: +- Logical flow (left-to-right or top-to-bottom) +- Clear visual hierarchy +- Balanced composition +- Appropriate use of whitespace +- No clutter or unnecessary decorative elements + +IMPORTANT - NO FIGURE NUMBERS: +- Do NOT include "Figure 1:", "Fig. 1", or any figure numbering in the image +- Do NOT add captions or titles like "Figure: ..." at the top or bottom +- Figure numbers and captions are added separately in the document/LaTeX +- The diagram should contain only the visual content itself +""" + + def __init__(self, api_key: Optional[str] = None, verbose: bool = False): + """ + Initialize the generator. + + Args: + api_key: OpenRouter API key (or use OPENROUTER_API_KEY env var) + verbose: Print detailed progress information + """ + # Priority: 1) explicit api_key param, 2) environment variable, 3) .env file + self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") + + # If not found in environment, try loading from .env file + if not self.api_key: + _load_env_file() + self.api_key = os.getenv("OPENROUTER_API_KEY") + + if not self.api_key: + raise ValueError( + "OPENROUTER_API_KEY not found. Please either:\n" + " 1. Set the OPENROUTER_API_KEY environment variable\n" + " 2. Add OPENROUTER_API_KEY to your .env file\n" + " 3. Pass api_key parameter to the constructor\n" + "Get your API key from: https://openrouter.ai/keys" + ) + + self.verbose = verbose + self._last_error = None # Track last error for better reporting + self.base_url = "https://openrouter.ai/api/v1" + # Nano Banana Pro - Google's advanced image generation model + # https://openrouter.ai/google/gemini-3-pro-image-preview + self.image_model = "google/gemini-3-pro-image-preview" + # Gemini 3 Pro for quality review - excellent vision and reasoning + self.review_model = "google/gemini-3-pro" + + def _log(self, message: str): + """Log message if verbose mode is enabled.""" + if self.verbose: + print(f"[{time.strftime('%H:%M:%S')}] {message}") + + def _make_request(self, model: str, messages: List[Dict[str, Any]], + modalities: Optional[List[str]] = None) -> Dict[str, Any]: + """ + Make a request to OpenRouter API. + + Args: + model: Model identifier + messages: List of message dictionaries + modalities: Optional list of modalities (e.g., ["image", "text"]) + + Returns: + API response as dictionary + """ + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + "HTTP-Referer": "https://github.com/scientific-writer", + "X-Title": "Scientific Schematic Generator" + } + + payload = { + "model": model, + "messages": messages + } + + if modalities: + payload["modalities"] = modalities + + self._log(f"Making request to {model}...") + + try: + response = requests.post( + f"{self.base_url}/chat/completions", + headers=headers, + json=payload, + timeout=120 + ) + + # Try to get response body even on error + try: + response_json = response.json() + except json.JSONDecodeError: + response_json = {"raw_text": response.text[:500]} + + # Check for HTTP errors but include response body in error message + if response.status_code != 200: + error_detail = response_json.get("error", response_json) + self._log(f"HTTP {response.status_code}: {error_detail}") + raise RuntimeError(f"API request failed (HTTP {response.status_code}): {error_detail}") + + return response_json + except requests.exceptions.Timeout: + raise RuntimeError("API request timed out after 120 seconds") + except requests.exceptions.RequestException as e: + raise RuntimeError(f"API request failed: {str(e)}") + + def _extract_image_from_response(self, response: Dict[str, Any]) -> Optional[bytes]: + """ + Extract base64-encoded image from API response. + + For Nano Banana Pro, images are returned in the 'images' field of the message, + not in the 'content' field. + + Args: + response: API response dictionary + + Returns: + Image bytes or None if not found + """ + try: + choices = response.get("choices", []) + if not choices: + self._log("No choices in response") + return None + + message = choices[0].get("message", {}) + + # IMPORTANT: Nano Banana Pro returns images in the 'images' field + images = message.get("images", []) + if images and len(images) > 0: + self._log(f"Found {len(images)} image(s) in 'images' field") + + # Get first image + first_image = images[0] + if isinstance(first_image, dict): + # Extract image_url + if first_image.get("type") == "image_url": + url = first_image.get("image_url", {}) + if isinstance(url, dict): + url = url.get("url", "") + + if url and url.startswith("data:image"): + # Extract base64 data after comma + if "," in url: + base64_str = url.split(",", 1)[1] + # Clean whitespace + base64_str = base64_str.replace('\n', '').replace('\r', '').replace(' ', '') + self._log(f"Extracted base64 data (length: {len(base64_str)})") + return base64.b64decode(base64_str) + + # Fallback: check content field (for other models or future changes) + content = message.get("content", "") + + if self.verbose: + self._log(f"Content type: {type(content)}, length: {len(str(content))}") + + # Handle string content + if isinstance(content, str) and "data:image" in content: + import re + match = re.search(r'data:image/[^;]+;base64,([A-Za-z0-9+/=\n\r]+)', content, re.DOTALL) + if match: + base64_str = match.group(1).replace('\n', '').replace('\r', '').replace(' ', '') + self._log(f"Found image in content field (length: {len(base64_str)})") + return base64.b64decode(base64_str) + + # Handle list content + if isinstance(content, list): + for i, block in enumerate(content): + if isinstance(block, dict) and block.get("type") == "image_url": + url = block.get("image_url", {}) + if isinstance(url, dict): + url = url.get("url", "") + if url and url.startswith("data:image") and "," in url: + base64_str = url.split(",", 1)[1].replace('\n', '').replace('\r', '').replace(' ', '') + self._log(f"Found image in content block {i}") + return base64.b64decode(base64_str) + + self._log("No image data found in response") + return None + + except Exception as e: + self._log(f"Error extracting image: {str(e)}") + import traceback + if self.verbose: + traceback.print_exc() + return None + + def _image_to_base64(self, image_path: str) -> str: + """ + Convert image file to base64 data URL. + + Args: + image_path: Path to image file + + Returns: + Base64 data URL string + """ + with open(image_path, "rb") as f: + image_data = f.read() + + # Determine image type from extension + ext = Path(image_path).suffix.lower() + mime_type = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp" + }.get(ext, "image/png") + + base64_data = base64.b64encode(image_data).decode("utf-8") + return f"data:{mime_type};base64,{base64_data}" + + def generate_image(self, prompt: str) -> Optional[bytes]: + """ + Generate an image using Nano Banana Pro. + + Args: + prompt: Description of the diagram to generate + + Returns: + Image bytes or None if generation failed + """ + self._last_error = None # Reset error + + messages = [ + { + "role": "user", + "content": prompt + } + ] + + try: + response = self._make_request( + model=self.image_model, + messages=messages, + modalities=["image", "text"] + ) + + # Debug: print response structure if verbose + if self.verbose: + self._log(f"Response keys: {response.keys()}") + if "error" in response: + self._log(f"API Error: {response['error']}") + if "choices" in response and response["choices"]: + msg = response["choices"][0].get("message", {}) + self._log(f"Message keys: {msg.keys()}") + # Show content preview without printing huge base64 data + content = msg.get("content", "") + if isinstance(content, str): + preview = content[:200] + "..." if len(content) > 200 else content + self._log(f"Content preview: {preview}") + elif isinstance(content, list): + self._log(f"Content is list with {len(content)} items") + for i, item in enumerate(content[:3]): + if isinstance(item, dict): + self._log(f" Item {i}: type={item.get('type')}") + + # Check for API errors in response + if "error" in response: + error_msg = response["error"] + if isinstance(error_msg, dict): + error_msg = error_msg.get("message", str(error_msg)) + self._last_error = f"API Error: {error_msg}" + print(f"✗ {self._last_error}") + return None + + image_data = self._extract_image_from_response(response) + if image_data: + self._log(f"✓ Generated image ({len(image_data)} bytes)") + else: + self._last_error = "No image data in API response - model may not support image generation" + self._log(f"✗ {self._last_error}") + # Additional debug info when image extraction fails + if self.verbose and "choices" in response: + msg = response["choices"][0].get("message", {}) + self._log(f"Full message structure: {json.dumps({k: type(v).__name__ for k, v in msg.items()})}") + + return image_data + except RuntimeError as e: + self._last_error = str(e) + self._log(f"✗ Generation failed: {self._last_error}") + return None + except Exception as e: + self._last_error = f"Unexpected error: {str(e)}" + self._log(f"✗ Generation failed: {self._last_error}") + import traceback + if self.verbose: + traceback.print_exc() + return None + + def review_image(self, image_path: str, original_prompt: str, + iteration: int, doc_type: str = "default", + max_iterations: int = 2) -> Tuple[str, float, bool]: + """ + Review generated image using Gemini 3 Pro for quality analysis. + + Uses Gemini 3 Pro's superior vision and reasoning capabilities to + evaluate the schematic quality and determine if regeneration is needed. + + Args: + image_path: Path to the generated image + original_prompt: Original user prompt + iteration: Current iteration number + doc_type: Document type (journal, poster, presentation, etc.) + max_iterations: Maximum iterations allowed + + Returns: + Tuple of (critique text, quality score 0-10, needs_improvement bool) + """ + # Use Gemini 3 Pro for review - excellent vision and analysis + image_data_url = self._image_to_base64(image_path) + + # Get quality threshold for this document type + threshold = self.QUALITY_THRESHOLDS.get(doc_type.lower(), + self.QUALITY_THRESHOLDS["default"]) + + review_prompt = f"""You are an expert reviewer evaluating a scientific diagram for publication quality. + +ORIGINAL REQUEST: {original_prompt} + +DOCUMENT TYPE: {doc_type} (quality threshold: {threshold}/10) +ITERATION: {iteration}/{max_iterations} + +Carefully evaluate this diagram on these criteria: + +1. **Scientific Accuracy** (0-2 points) + - Correct representation of concepts + - Proper notation and symbols + - Accurate relationships shown + +2. **Clarity and Readability** (0-2 points) + - Easy to understand at a glance + - Clear visual hierarchy + - No ambiguous elements + +3. **Label Quality** (0-2 points) + - All important elements labeled + - Labels are readable (appropriate font size) + - Consistent labeling style + +4. **Layout and Composition** (0-2 points) + - Logical flow (top-to-bottom or left-to-right) + - Balanced use of space + - No overlapping elements + +5. **Professional Appearance** (0-2 points) + - Publication-ready quality + - Clean, crisp lines and shapes + - Appropriate colors/contrast + +RESPOND IN THIS EXACT FORMAT: +SCORE: [total score 0-10] + +STRENGTHS: +- [strength 1] +- [strength 2] + +ISSUES: +- [issue 1 if any] +- [issue 2 if any] + +VERDICT: [ACCEPTABLE or NEEDS_IMPROVEMENT] + +If score >= {threshold}, the diagram is ACCEPTABLE for {doc_type} publication. +If score < {threshold}, mark as NEEDS_IMPROVEMENT with specific suggestions.""" + + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": review_prompt + }, + { + "type": "image_url", + "image_url": { + "url": image_data_url + } + } + ] + } + ] + + try: + # Use Gemini 3 Pro for high-quality review + response = self._make_request( + model=self.review_model, + messages=messages + ) + + # Extract text response + choices = response.get("choices", []) + if not choices: + return "Image generated successfully", 8.0 + + message = choices[0].get("message", {}) + content = message.get("content", "") + + # Check reasoning field (Nano Banana Pro puts analysis here) + reasoning = message.get("reasoning", "") + if reasoning and not content: + content = reasoning + + if isinstance(content, list): + # Extract text from content blocks + text_parts = [] + for block in content: + if isinstance(block, dict) and block.get("type") == "text": + text_parts.append(block.get("text", "")) + content = "\n".join(text_parts) + + # Try to extract score + score = 7.5 # Default score if extraction fails + import re + + # Look for SCORE: X or SCORE: X/10 format + score_match = re.search(r'SCORE:\s*(\d+(?:\.\d+)?)', content, re.IGNORECASE) + if score_match: + score = float(score_match.group(1)) + else: + # Fallback: look for any score pattern + score_match = re.search(r'(?:score|rating|quality)[:\s]+(\d+(?:\.\d+)?)\s*(?:/\s*10)?', content, re.IGNORECASE) + if score_match: + score = float(score_match.group(1)) + + # Determine if improvement is needed based on verdict or score + needs_improvement = False + if "NEEDS_IMPROVEMENT" in content.upper(): + needs_improvement = True + elif score < threshold: + needs_improvement = True + + self._log(f"✓ Review complete (Score: {score}/10, Threshold: {threshold}/10)") + self._log(f" Verdict: {'Needs improvement' if needs_improvement else 'Acceptable'}") + + return (content if content else "Image generated successfully", + score, + needs_improvement) + except Exception as e: + self._log(f"Review skipped: {str(e)}") + # Don't fail the whole process if review fails - assume acceptable + return "Image generated successfully (review skipped)", 7.5, False + + def improve_prompt(self, original_prompt: str, critique: str, + iteration: int) -> str: + """ + Improve the generation prompt based on critique. + + Args: + original_prompt: Original user prompt + critique: Review critique from previous iteration + iteration: Current iteration number + + Returns: + Improved prompt for next generation + """ + improved_prompt = f"""{self.SCIENTIFIC_DIAGRAM_GUIDELINES} + +USER REQUEST: {original_prompt} + +ITERATION {iteration}: Based on previous feedback, address these specific improvements: +{critique} + +Generate an improved version that addresses all the critique points while maintaining scientific accuracy and professional quality.""" + + return improved_prompt + + def generate_iterative(self, user_prompt: str, output_path: str, + iterations: int = 2, + doc_type: str = "default") -> Dict[str, Any]: + """ + Generate scientific schematic with smart iterative refinement. + + Only regenerates if the quality score is below the threshold for the + specified document type. This saves API calls and time when the first + generation is already good enough. + + Args: + user_prompt: User's description of desired diagram + output_path: Path to save final image + iterations: Maximum refinement iterations (default: 2, max: 2) + doc_type: Document type for quality threshold (journal, poster, etc.) + + Returns: + Dictionary with generation results and metadata + """ + output_path = Path(output_path) + output_dir = output_path.parent + output_dir.mkdir(parents=True, exist_ok=True) + + base_name = output_path.stem + extension = output_path.suffix or ".png" + + # Get quality threshold for this document type + threshold = self.QUALITY_THRESHOLDS.get(doc_type.lower(), + self.QUALITY_THRESHOLDS["default"]) + + results = { + "user_prompt": user_prompt, + "doc_type": doc_type, + "quality_threshold": threshold, + "iterations": [], + "final_image": None, + "final_score": 0.0, + "success": False, + "early_stop": False, + "early_stop_reason": None + } + + current_prompt = f"""{self.SCIENTIFIC_DIAGRAM_GUIDELINES} + +USER REQUEST: {user_prompt} + +Generate a publication-quality scientific diagram that meets all the guidelines above.""" + + print(f"\n{'='*60}") + print(f"Generating Scientific Schematic") + print(f"{'='*60}") + print(f"Description: {user_prompt}") + print(f"Document Type: {doc_type}") + print(f"Quality Threshold: {threshold}/10") + print(f"Max Iterations: {iterations}") + print(f"Output: {output_path}") + print(f"{'='*60}\n") + + for i in range(1, iterations + 1): + print(f"\n[Iteration {i}/{iterations}]") + print("-" * 40) + + # Generate image + print(f"Generating image...") + image_data = self.generate_image(current_prompt) + + if not image_data: + error_msg = getattr(self, '_last_error', 'Image generation failed - no image data returned') + print(f"✗ Generation failed: {error_msg}") + results["iterations"].append({ + "iteration": i, + "success": False, + "error": error_msg + }) + continue + + # Save iteration image + iter_path = output_dir / f"{base_name}_v{i}{extension}" + with open(iter_path, "wb") as f: + f.write(image_data) + print(f"✓ Saved: {iter_path}") + + # Review image using Gemini 3 Pro + print(f"Reviewing image with Gemini 3 Pro...") + critique, score, needs_improvement = self.review_image( + str(iter_path), user_prompt, i, doc_type, iterations + ) + print(f"✓ Score: {score}/10 (threshold: {threshold}/10)") + + # Save iteration results + iteration_result = { + "iteration": i, + "image_path": str(iter_path), + "prompt": current_prompt, + "critique": critique, + "score": score, + "needs_improvement": needs_improvement, + "success": True + } + results["iterations"].append(iteration_result) + + # Check if quality is acceptable - STOP EARLY if so + if not needs_improvement: + print(f"\n✓ Quality meets {doc_type} threshold ({score} >= {threshold})") + print(f" No further iterations needed!") + results["final_image"] = str(iter_path) + results["final_score"] = score + results["success"] = True + results["early_stop"] = True + results["early_stop_reason"] = f"Quality score {score} meets threshold {threshold} for {doc_type}" + break + + # If this is the last iteration, we're done regardless + if i == iterations: + print(f"\n⚠ Maximum iterations reached") + results["final_image"] = str(iter_path) + results["final_score"] = score + results["success"] = True + break + + # Quality below threshold - improve prompt for next iteration + print(f"\n⚠ Quality below threshold ({score} < {threshold})") + print(f"Improving prompt based on feedback...") + current_prompt = self.improve_prompt(user_prompt, critique, i + 1) + + # Copy final version to output path + if results["success"] and results["final_image"]: + final_iter_path = Path(results["final_image"]) + if final_iter_path != output_path: + import shutil + shutil.copy(final_iter_path, output_path) + print(f"\n✓ Final image: {output_path}") + + # Save review log + log_path = output_dir / f"{base_name}_review_log.json" + with open(log_path, "w") as f: + json.dump(results, f, indent=2) + print(f"✓ Review log: {log_path}") + + print(f"\n{'='*60}") + print(f"Generation Complete!") + print(f"Final Score: {results['final_score']}/10") + if results["early_stop"]: + print(f"Iterations Used: {len([r for r in results['iterations'] if r.get('success')])}/{iterations} (early stop)") + print(f"{'='*60}\n") + + return results + + +def main(): + """Command-line interface.""" + parser = argparse.ArgumentParser( + description="Generate scientific schematics using AI with smart iterative refinement", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate a flowchart for a journal paper + python generate_schematic_ai.py "CONSORT participant flow diagram" -o flowchart.png --doc-type journal + + # Generate neural network architecture for presentation (lower threshold) + python generate_schematic_ai.py "Transformer encoder-decoder architecture" -o transformer.png --doc-type presentation + + # Generate with custom max iterations for poster + python generate_schematic_ai.py "Biological signaling pathway" -o pathway.png --iterations 2 --doc-type poster + + # Verbose output + python generate_schematic_ai.py "Circuit diagram" -o circuit.png -v + +Document Types (quality thresholds): + journal 8.5/10 - Nature, Science, peer-reviewed journals + conference 8.0/10 - Conference papers + thesis 8.0/10 - Dissertations, theses + grant 8.0/10 - Grant proposals + preprint 7.5/10 - arXiv, bioRxiv, etc. + report 7.5/10 - Technical reports + poster 7.0/10 - Academic posters + presentation 6.5/10 - Slides, talks + default 7.5/10 - General purpose + +Note: Multiple iterations only occur if quality is BELOW the threshold. + If the first generation meets the threshold, no extra API calls are made. + +Environment: + OPENROUTER_API_KEY OpenRouter API key (required) + """ + ) + + parser.add_argument("prompt", help="Description of the diagram to generate") + parser.add_argument("-o", "--output", required=True, + help="Output image path (e.g., diagram.png)") + parser.add_argument("--iterations", type=int, default=2, + help="Maximum refinement iterations (default: 2, max: 2)") + parser.add_argument("--doc-type", default="default", + choices=["journal", "conference", "poster", "presentation", + "report", "grant", "thesis", "preprint", "default"], + help="Document type for quality threshold (default: default)") + parser.add_argument("--api-key", help="OpenRouter API key (or set OPENROUTER_API_KEY)") + parser.add_argument("-v", "--verbose", action="store_true", + help="Verbose output") + + args = parser.parse_args() + + # Check for API key + api_key = args.api_key or os.getenv("OPENROUTER_API_KEY") + if not api_key: + print("Error: OPENROUTER_API_KEY environment variable not set") + print("\nSet it with:") + print(" export OPENROUTER_API_KEY='your_api_key'") + print("\nOr provide via --api-key flag") + sys.exit(1) + + # Validate iterations - enforce max of 2 + if args.iterations < 1 or args.iterations > 2: + print("Error: Iterations must be between 1 and 2") + sys.exit(1) + + try: + generator = ScientificSchematicGenerator(api_key=api_key, verbose=args.verbose) + results = generator.generate_iterative( + user_prompt=args.prompt, + output_path=args.output, + iterations=args.iterations, + doc_type=args.doc_type + ) + + if results["success"]: + print(f"\n✓ Success! Image saved to: {args.output}") + if results.get("early_stop"): + print(f" (Completed in {len([r for r in results['iterations'] if r.get('success')])} iteration(s) - quality threshold met)") + sys.exit(0) + else: + print(f"\n✗ Generation failed. Check review log for details.") + sys.exit(1) + except Exception as e: + print(f"\n✗ Error: {str(e)}") + sys.exit(1) + + +if __name__ == "__main__": + main() + diff --git a/test_ai_generation.py b/test_ai_generation.py new file mode 100644 index 0000000..0c4db82 --- /dev/null +++ b/test_ai_generation.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +""" +Test script to verify AI generation implementation. + +This script performs dry-run tests without making actual API calls. +It verifies: +1. Script structure and imports +2. Class initialization +3. Method signatures +4. Error handling +5. Command-line interface + +Usage: + python test_ai_generation.py +""" + +import sys +import os +from pathlib import Path + +# Add scripts directory to path +scripts_dir = Path(__file__).parent / "scripts" +sys.path.insert(0, str(scripts_dir)) + +def test_imports(): + """Test that all required modules can be imported.""" + print("Testing imports...") + try: + from generate_schematic_ai import ScientificSchematicGenerator + print("✓ generate_schematic_ai imports successfully") + return True + except ImportError as e: + print(f"✗ Import failed: {e}") + return False + +def test_class_structure(): + """Test class initialization and structure.""" + print("\nTesting class structure...") + try: + from generate_schematic_ai import ScientificSchematicGenerator + + # Test initialization with dummy key + generator = ScientificSchematicGenerator(api_key="test_key", verbose=False) + print("✓ Class initializes successfully") + + # Check required methods exist + required_methods = [ + 'generate_image', + 'review_image', + 'improve_prompt', + 'generate_iterative' + ] + + for method in required_methods: + if not hasattr(generator, method): + print(f"✗ Missing method: {method}") + return False + print(f"✓ Method exists: {method}") + + # Check attributes + if not hasattr(generator, 'api_key'): + print("✗ Missing attribute: api_key") + return False + print("✓ Attribute exists: api_key") + + if not hasattr(generator, 'image_model'): + print("✗ Missing attribute: image_model") + return False + print(f"✓ Image model: {generator.image_model}") + + if not hasattr(generator, 'review_model'): + print("✗ Missing attribute: review_model") + return False + print(f"✓ Review model: {generator.review_model}") + + return True + except Exception as e: + print(f"✗ Class structure test failed: {e}") + return False + +def test_error_handling(): + """Test error handling for missing API key.""" + print("\nTesting error handling...") + try: + from generate_schematic_ai import ScientificSchematicGenerator + + # Clear environment variable + old_key = os.environ.get("OPENROUTER_API_KEY") + if old_key: + del os.environ["OPENROUTER_API_KEY"] + + # Try to initialize without key + try: + generator = ScientificSchematicGenerator() + print("✗ Should have raised ValueError for missing API key") + return False + except ValueError as e: + if "OPENROUTER_API_KEY" in str(e): + print("✓ Correctly raises ValueError for missing API key") + else: + print(f"✗ Wrong error message: {e}") + return False + + # Restore environment variable + if old_key: + os.environ["OPENROUTER_API_KEY"] = old_key + + return True + except Exception as e: + print(f"✗ Error handling test failed: {e}") + return False + +def test_wrapper_script(): + """Test wrapper script structure.""" + print("\nTesting wrapper script...") + try: + import generate_schematic + print("✓ generate_schematic imports successfully") + + # Check main functions exist + if not hasattr(generate_schematic, 'main'): + print("✗ Missing function: main") + return False + print("✓ Function exists: main") + + return True + except Exception as e: + print(f"✗ Wrapper script test failed: {e}") + return False + +def test_prompt_engineering(): + """Test prompt construction.""" + print("\nTesting prompt engineering...") + try: + from generate_schematic_ai import ScientificSchematicGenerator + + generator = ScientificSchematicGenerator(api_key="test_key", verbose=False) + + # Test improve_prompt method + original = "Create a flowchart" + critique = "Add more spacing between boxes" + improved = generator.improve_prompt(original, critique, 2) + + if not improved: + print("✗ improve_prompt returned empty string") + return False + + if original not in improved: + print("✗ Improved prompt doesn't include original") + return False + + if critique not in improved: + print("✗ Improved prompt doesn't include critique") + return False + + if "ITERATION 2" not in improved: + print("✗ Improved prompt doesn't include iteration number") + return False + + print("✓ Prompt engineering works correctly") + print(f" Original length: {len(original)} chars") + print(f" Improved length: {len(improved)} chars") + + return True + except Exception as e: + print(f"✗ Prompt engineering test failed: {e}") + return False + +def test_file_paths(): + """Test that all required files exist.""" + print("\nTesting file structure...") + + base_dir = Path(__file__).parent + required_files = [ + "scripts/generate_schematic_ai.py", + "scripts/generate_schematic.py", + "SKILL.md", + "README.md" + ] + + all_exist = True + for file_path in required_files: + full_path = base_dir / file_path + if full_path.exists(): + print(f"✓ {file_path}") + else: + print(f"✗ Missing: {file_path}") + all_exist = False + + return all_exist + +def main(): + """Run all tests.""" + print("="*60) + print("Scientific Schematics AI Generation - Verification Tests") + print("="*60) + + tests = [ + ("File Structure", test_file_paths), + ("Imports", test_imports), + ("Class Structure", test_class_structure), + ("Error Handling", test_error_handling), + ("Wrapper Script", test_wrapper_script), + ("Prompt Engineering", test_prompt_engineering), + ] + + results = [] + for test_name, test_func in tests: + try: + result = test_func() + results.append((test_name, result)) + except Exception as e: + print(f"\n✗ Test '{test_name}' crashed: {e}") + results.append((test_name, False)) + + # Summary + print("\n" + "="*60) + print("Test Summary") + print("="*60) + + passed = sum(1 for _, result in results if result) + total = len(results) + + for test_name, result in results: + status = "✓ PASS" if result else "✗ FAIL" + print(f"{status}: {test_name}") + + print(f"\nTotal: {passed}/{total} tests passed") + + if passed == total: + print("\n✓ All tests passed! Implementation verified.") + print("\nNext steps:") + print("1. Set OPENROUTER_API_KEY environment variable") + print("2. Test with actual API call:") + print(" python scripts/generate_schematic.py 'test diagram' -o test.png") + return 0 + else: + print(f"\n✗ {total - passed} test(s) failed. Please review errors above.") + return 1 + +if __name__ == "__main__": + sys.exit(main()) +