Update all the latest writing skills

This commit is contained in:
Vinayak Agarwal
2025-12-12 11:42:41 -08:00
parent c85faf039a
commit cf1d4aac5d
30 changed files with 5895 additions and 2983 deletions

View File

@@ -1,317 +1,228 @@
#!/usr/bin/env python3
"""
Batch conversion utility for MarkItDown.
Batch convert multiple files to Markdown using MarkItDown.
Converts all supported files in a directory to Markdown format.
This script demonstrates how to efficiently convert multiple files
in a directory to Markdown format.
"""
import os
import sys
from pathlib import Path
from markitdown import MarkItDown
from typing import Optional, List
import argparse
from pathlib import Path
from typing import List, Optional
from markitdown import MarkItDown
from concurrent.futures import ThreadPoolExecutor, as_completed
import sys
# Supported file extensions
SUPPORTED_EXTENSIONS = {
'.pdf', '.docx', '.pptx', '.xlsx', '.xls',
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff',
'.wav', '.mp3', '.flac', '.ogg', '.aiff',
'.html', '.htm', '.epub',
'.csv', '.json', '.xml',
'.zip'
}
def setup_markitdown(
use_llm: bool = False,
llm_model: str = "gpt-4o",
use_azure_di: bool = False,
azure_endpoint: Optional[str] = None,
azure_key: Optional[str] = None
) -> MarkItDown:
"""
Setup MarkItDown instance with optional advanced features.
Args:
use_llm: Enable LLM-powered image descriptions
llm_model: LLM model to use (default: gpt-4o)
use_azure_di: Enable Azure Document Intelligence
azure_endpoint: Azure Document Intelligence endpoint
azure_key: Azure Document Intelligence API key
Returns:
Configured MarkItDown instance
"""
kwargs = {}
if use_llm:
try:
from openai import OpenAI
client = OpenAI()
kwargs['llm_client'] = client
kwargs['llm_model'] = llm_model
print(f"✓ LLM integration enabled ({llm_model})")
except ImportError:
print("✗ Warning: OpenAI not installed, LLM features disabled")
print(" Install with: pip install openai")
if use_azure_di:
if azure_endpoint and azure_key:
kwargs['docintel_endpoint'] = azure_endpoint
kwargs['docintel_key'] = azure_key
print("✓ Azure Document Intelligence enabled")
else:
print("✗ Warning: Azure credentials not provided, Azure DI disabled")
return MarkItDown(**kwargs)
def convert_file(
md: MarkItDown,
input_path: Path,
output_dir: Path,
verbose: bool = False
) -> bool:
def convert_file(md: MarkItDown, file_path: Path, output_dir: Path, verbose: bool = False) -> tuple[bool, str, str]:
"""
Convert a single file to Markdown.
Args:
md: MarkItDown instance
input_path: Path to input file
file_path: Path to input file
output_dir: Directory for output files
verbose: Print detailed progress
verbose: Print detailed messages
Returns:
True if successful, False otherwise
Tuple of (success, input_path, message)
"""
try:
if verbose:
print(f" Processing: {input_path.name}")
# Convert file
result = md.convert(str(input_path))
# Create output filename
output_filename = input_path.stem + '.md'
output_path = output_dir / output_filename
# Write output
with open(output_path, 'w', encoding='utf-8') as f:
f.write(result.text_content)
if verbose:
print(f" ✓ Converted: {input_path.name}{output_filename}")
return True
print(f"Converting: {file_path}")
result = md.convert(str(file_path))
# Create output path
output_file = output_dir / f"{file_path.stem}.md"
# Write content with metadata header
content = f"# {result.title or file_path.stem}\n\n"
content += f"**Source**: {file_path.name}\n"
content += f"**Format**: {file_path.suffix}\n\n"
content += "---\n\n"
content += result.text_content
output_file.write_text(content, encoding='utf-8')
return True, str(file_path), f"✓ Converted to {output_file.name}"
except Exception as e:
print(f" ✗ Error converting {input_path.name}: {e}")
return False
def find_files(input_dir: Path, recursive: bool = False) -> List[Path]:
"""
Find all supported files in directory.
Args:
input_dir: Directory to search
recursive: Search subdirectories
Returns:
List of file paths
"""
files = []
if recursive:
for ext in SUPPORTED_EXTENSIONS:
files.extend(input_dir.rglob(f"*{ext}"))
else:
for ext in SUPPORTED_EXTENSIONS:
files.extend(input_dir.glob(f"*{ext}"))
return sorted(files)
return False, str(file_path), f"✗ Error: {str(e)}"
def batch_convert(
input_dir: str,
output_dir: str,
input_dir: Path,
output_dir: Path,
extensions: Optional[List[str]] = None,
recursive: bool = False,
use_llm: bool = False,
llm_model: str = "gpt-4o",
use_azure_di: bool = False,
azure_endpoint: Optional[str] = None,
azure_key: Optional[str] = None,
verbose: bool = False
) -> None:
workers: int = 4,
verbose: bool = False,
enable_plugins: bool = False
) -> dict:
"""
Batch convert all supported files in a directory.
Batch convert files in a directory.
Args:
input_dir: Input directory containing files
output_dir: Output directory for Markdown files
input_dir: Input directory
output_dir: Output directory
extensions: List of file extensions to convert (e.g., ['.pdf', '.docx'])
recursive: Search subdirectories
use_llm: Enable LLM-powered descriptions
llm_model: LLM model to use
use_azure_di: Enable Azure Document Intelligence
azure_endpoint: Azure DI endpoint
azure_key: Azure DI API key
verbose: Print detailed progress
workers: Number of parallel workers
verbose: Print detailed messages
enable_plugins: Enable MarkItDown plugins
Returns:
Dictionary with conversion statistics
"""
input_path = Path(input_dir)
output_path = Path(output_dir)
# Validate input directory
if not input_path.exists():
print(f"✗ Error: Input directory '{input_dir}' does not exist")
sys.exit(1)
if not input_path.is_dir():
print(f"✗ Error: '{input_dir}' is not a directory")
sys.exit(1)
# Create output directory
output_path.mkdir(parents=True, exist_ok=True)
# Setup MarkItDown
print("Setting up MarkItDown...")
md = setup_markitdown(
use_llm=use_llm,
llm_model=llm_model,
use_azure_di=use_azure_di,
azure_endpoint=azure_endpoint,
azure_key=azure_key
)
output_dir.mkdir(parents=True, exist_ok=True)
# Default extensions if not specified
if extensions is None:
extensions = ['.pdf', '.docx', '.pptx', '.xlsx', '.html', '.jpg', '.png']
# Find files
print(f"\nScanning directory: {input_dir}")
files = []
if recursive:
print(" (including subdirectories)")
files = find_files(input_path, recursive)
for ext in extensions:
files.extend(input_dir.rglob(f"*{ext}"))
else:
for ext in extensions:
files.extend(input_dir.glob(f"*{ext}"))
if not files:
print("No supported files found")
print(f" Supported extensions: {', '.join(sorted(SUPPORTED_EXTENSIONS))}")
sys.exit(0)
print(f"✓ Found {len(files)} file(s) to convert\n")
# Convert files
successful = 0
failed = 0
for file_path in files:
if convert_file(md, file_path, output_path, verbose):
successful += 1
else:
failed += 1
# Summary
print(f"\n{'='*60}")
print(f"Conversion complete!")
print(f" Successful: {successful}")
print(f" Failed: {failed}")
print(f" Output: {output_dir}")
print(f"{'='*60}")
print(f"No files found with extensions: {', '.join(extensions)}")
return {'total': 0, 'success': 0, 'failed': 0}
print(f"Found {len(files)} file(s) to convert")
# Create MarkItDown instance
md = MarkItDown(enable_plugins=enable_plugins)
# Convert files in parallel
results = {
'total': len(files),
'success': 0,
'failed': 0,
'details': []
}
with ThreadPoolExecutor(max_workers=workers) as executor:
futures = {
executor.submit(convert_file, md, file_path, output_dir, verbose): file_path
for file_path in files
}
for future in as_completed(futures):
success, path, message = future.result()
if success:
results['success'] += 1
else:
results['failed'] += 1
results['details'].append({
'file': path,
'success': success,
'message': message
})
print(message)
return results
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Batch convert files to Markdown using MarkItDown",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Basic usage
python batch_convert.py documents/ output/
# Recursive conversion
python batch_convert.py documents/ output/ --recursive
# With LLM-powered image descriptions
python batch_convert.py documents/ output/ --llm
# With Azure Document Intelligence
python batch_convert.py documents/ output/ --azure \\
--azure-endpoint https://example.cognitiveservices.azure.com/ \\
--azure-key YOUR-KEY
# All features enabled
python batch_convert.py documents/ output/ --llm --azure \\
--azure-endpoint $AZURE_ENDPOINT --azure-key $AZURE_KEY
Supported file types:
Documents: PDF, DOCX, PPTX, XLSX, XLS
Images: JPG, PNG, GIF, BMP, TIFF
Audio: WAV, MP3, FLAC, OGG, AIFF
Web: HTML, EPUB
Data: CSV, JSON, XML
Archives: ZIP
# Convert all PDFs in a directory
python batch_convert.py papers/ output/ --extensions .pdf
# Convert multiple formats recursively
python batch_convert.py documents/ markdown/ --extensions .pdf .docx .pptx -r
# Use 8 parallel workers
python batch_convert.py input/ output/ --workers 8
# Enable plugins
python batch_convert.py input/ output/ --plugins
"""
)
parser.add_argument('input_dir', type=Path, help='Input directory')
parser.add_argument('output_dir', type=Path, help='Output directory')
parser.add_argument(
'input_dir',
help='Input directory containing files to convert'
'--extensions', '-e',
nargs='+',
help='File extensions to convert (e.g., .pdf .docx)'
)
parser.add_argument(
'output_dir',
help='Output directory for Markdown files'
)
parser.add_argument(
'-r', '--recursive',
'--recursive', '-r',
action='store_true',
help='Recursively search subdirectories'
help='Search subdirectories recursively'
)
parser.add_argument(
'--llm',
'--workers', '-w',
type=int,
default=4,
help='Number of parallel workers (default: 4)'
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Enable LLM-powered image descriptions (requires OpenAI API key)'
help='Verbose output'
)
parser.add_argument(
'--llm-model',
default='gpt-4o',
help='LLM model to use (default: gpt-4o)'
)
parser.add_argument(
'--azure',
'--plugins', '-p',
action='store_true',
help='Enable Azure Document Intelligence for PDFs'
help='Enable MarkItDown plugins'
)
parser.add_argument(
'--azure-endpoint',
help='Azure Document Intelligence endpoint URL'
)
parser.add_argument(
'--azure-key',
help='Azure Document Intelligence API key'
)
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='Print detailed progress'
)
args = parser.parse_args()
# Environment variable fallbacks for Azure
azure_endpoint = args.azure_endpoint or os.getenv('AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT')
azure_key = args.azure_key or os.getenv('AZURE_DOCUMENT_INTELLIGENCE_KEY')
batch_convert(
# Validate input directory
if not args.input_dir.exists():
print(f"Error: Input directory '{args.input_dir}' does not exist")
sys.exit(1)
if not args.input_dir.is_dir():
print(f"Error: '{args.input_dir}' is not a directory")
sys.exit(1)
# Run batch conversion
results = batch_convert(
input_dir=args.input_dir,
output_dir=args.output_dir,
extensions=args.extensions,
recursive=args.recursive,
use_llm=args.llm,
llm_model=args.llm_model,
use_azure_di=args.azure,
azure_endpoint=azure_endpoint,
azure_key=azure_key,
verbose=args.verbose
workers=args.workers,
verbose=args.verbose,
enable_plugins=args.plugins
)
# Print summary
print("\n" + "="*50)
print("CONVERSION SUMMARY")
print("="*50)
print(f"Total files: {results['total']}")
print(f"Successful: {results['success']}")
print(f"Failed: {results['failed']}")
print(f"Success rate: {results['success']/results['total']*100:.1f}%" if results['total'] > 0 else "N/A")
# Show failed files if any
if results['failed'] > 0:
print("\nFailed conversions:")
for detail in results['details']:
if not detail['success']:
print(f" - {detail['file']}: {detail['message']}")
sys.exit(0 if results['failed'] == 0 else 1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,283 @@
#!/usr/bin/env python3
"""
Convert scientific literature PDFs to Markdown for analysis and review.
This script is specifically designed for converting academic papers,
organizing them, and preparing them for literature review workflows.
"""
import argparse
import json
import re
import sys
from pathlib import Path
from typing import List, Dict, Optional
from markitdown import MarkItDown
from datetime import datetime
def extract_metadata_from_filename(filename: str) -> Dict[str, str]:
"""
Try to extract metadata from filename.
Supports patterns like: Author_Year_Title.pdf
"""
metadata = {}
# Remove extension
name = Path(filename).stem
# Try to extract year
year_match = re.search(r'\b(19|20)\d{2}\b', name)
if year_match:
metadata['year'] = year_match.group()
# Split by underscores or dashes
parts = re.split(r'[_\-]', name)
if len(parts) >= 2:
metadata['author'] = parts[0].replace('_', ' ')
metadata['title'] = ' '.join(parts[1:]).replace('_', ' ')
else:
metadata['title'] = name.replace('_', ' ')
return metadata
def convert_paper(
md: MarkItDown,
input_file: Path,
output_dir: Path,
organize_by_year: bool = False
) -> tuple[bool, Dict]:
"""
Convert a single paper to Markdown with metadata extraction.
Args:
md: MarkItDown instance
input_file: Path to PDF file
output_dir: Output directory
organize_by_year: Organize into year subdirectories
Returns:
Tuple of (success, metadata_dict)
"""
try:
print(f"Converting: {input_file.name}")
# Convert to Markdown
result = md.convert(str(input_file))
# Extract metadata from filename
metadata = extract_metadata_from_filename(input_file.name)
metadata['source_file'] = input_file.name
metadata['converted_date'] = datetime.now().isoformat()
# Try to extract title from content if not in filename
if 'title' not in metadata and result.title:
metadata['title'] = result.title
# Create output path
if organize_by_year and 'year' in metadata:
output_subdir = output_dir / metadata['year']
output_subdir.mkdir(parents=True, exist_ok=True)
else:
output_subdir = output_dir
output_subdir.mkdir(parents=True, exist_ok=True)
output_file = output_subdir / f"{input_file.stem}.md"
# Create formatted Markdown with front matter
content = "---\n"
content += f"title: \"{metadata.get('title', input_file.stem)}\"\n"
if 'author' in metadata:
content += f"author: \"{metadata['author']}\"\n"
if 'year' in metadata:
content += f"year: {metadata['year']}\n"
content += f"source: \"{metadata['source_file']}\"\n"
content += f"converted: \"{metadata['converted_date']}\"\n"
content += "---\n\n"
# Add title
content += f"# {metadata.get('title', input_file.stem)}\n\n"
# Add metadata section
content += "## Document Information\n\n"
if 'author' in metadata:
content += f"**Author**: {metadata['author']}\n"
if 'year' in metadata:
content += f"**Year**: {metadata['year']}\n"
content += f"**Source File**: {metadata['source_file']}\n"
content += f"**Converted**: {metadata['converted_date']}\n\n"
content += "---\n\n"
# Add content
content += result.text_content
# Write to file
output_file.write_text(content, encoding='utf-8')
print(f"✓ Saved to: {output_file}")
return True, metadata
except Exception as e:
print(f"✗ Error converting {input_file.name}: {str(e)}")
return False, {'source_file': input_file.name, 'error': str(e)}
def create_index(papers: List[Dict], output_dir: Path):
"""Create an index/catalog of all converted papers."""
# Sort by year (if available) and title
papers_sorted = sorted(
papers,
key=lambda x: (x.get('year', '9999'), x.get('title', ''))
)
# Create Markdown index
index_content = "# Literature Review Index\n\n"
index_content += f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
index_content += f"**Total Papers**: {len(papers)}\n\n"
index_content += "---\n\n"
# Group by year
by_year = {}
for paper in papers_sorted:
year = paper.get('year', 'Unknown')
if year not in by_year:
by_year[year] = []
by_year[year].append(paper)
# Write by year
for year in sorted(by_year.keys()):
index_content += f"## {year}\n\n"
for paper in by_year[year]:
title = paper.get('title', paper.get('source_file', 'Unknown'))
author = paper.get('author', 'Unknown Author')
source = paper.get('source_file', '')
# Create link to markdown file
md_file = Path(source).stem + ".md"
if 'year' in paper and paper['year'] != 'Unknown':
md_file = f"{paper['year']}/{md_file}"
index_content += f"- **{title}**\n"
index_content += f" - Author: {author}\n"
index_content += f" - Source: {source}\n"
index_content += f" - [Read Markdown]({md_file})\n\n"
# Write index
index_file = output_dir / "INDEX.md"
index_file.write_text(index_content, encoding='utf-8')
print(f"\n✓ Created index: {index_file}")
# Also create JSON catalog
catalog_file = output_dir / "catalog.json"
with open(catalog_file, 'w', encoding='utf-8') as f:
json.dump(papers_sorted, f, indent=2, ensure_ascii=False)
print(f"✓ Created catalog: {catalog_file}")
def main():
parser = argparse.ArgumentParser(
description="Convert scientific literature PDFs to Markdown",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Convert all PDFs in a directory
python convert_literature.py papers/ output/
# Organize by year
python convert_literature.py papers/ output/ --organize-by-year
# Create index of all papers
python convert_literature.py papers/ output/ --create-index
Filename Conventions:
For best results, name your PDFs using this pattern:
Author_Year_Title.pdf
Examples:
Smith_2023_Machine_Learning_Applications.pdf
Jones_2022_Climate_Change_Analysis.pdf
"""
)
parser.add_argument('input_dir', type=Path, help='Directory with PDF files')
parser.add_argument('output_dir', type=Path, help='Output directory for Markdown files')
parser.add_argument(
'--organize-by-year', '-y',
action='store_true',
help='Organize output into year subdirectories'
)
parser.add_argument(
'--create-index', '-i',
action='store_true',
help='Create an index/catalog of all papers'
)
parser.add_argument(
'--recursive', '-r',
action='store_true',
help='Search subdirectories recursively'
)
args = parser.parse_args()
# Validate input
if not args.input_dir.exists():
print(f"Error: Input directory '{args.input_dir}' does not exist")
sys.exit(1)
if not args.input_dir.is_dir():
print(f"Error: '{args.input_dir}' is not a directory")
sys.exit(1)
# Find PDF files
if args.recursive:
pdf_files = list(args.input_dir.rglob("*.pdf"))
else:
pdf_files = list(args.input_dir.glob("*.pdf"))
if not pdf_files:
print("No PDF files found")
sys.exit(1)
print(f"Found {len(pdf_files)} PDF file(s)")
# Create MarkItDown instance
md = MarkItDown()
# Convert all papers
results = []
success_count = 0
for pdf_file in pdf_files:
success, metadata = convert_paper(
md,
pdf_file,
args.output_dir,
args.organize_by_year
)
if success:
success_count += 1
results.append(metadata)
# Create index if requested
if args.create_index and results:
create_index(results, args.output_dir)
# Print summary
print("\n" + "="*50)
print("CONVERSION SUMMARY")
print("="*50)
print(f"Total papers: {len(pdf_files)}")
print(f"Successful: {success_count}")
print(f"Failed: {len(pdf_files) - success_count}")
print(f"Success rate: {success_count/len(pdf_files)*100:.1f}%")
sys.exit(0 if success_count == len(pdf_files) else 1)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,243 @@
#!/usr/bin/env python3
"""
Convert documents to Markdown with AI-enhanced image descriptions.
This script demonstrates how to use MarkItDown with OpenRouter to generate
detailed descriptions of images in documents (PowerPoint, PDFs with images, etc.)
"""
import argparse
import os
import sys
from pathlib import Path
from markitdown import MarkItDown
from openai import OpenAI
# Predefined prompts for different use cases
PROMPTS = {
'scientific': """
Analyze this scientific image or diagram. Provide:
1. Type of visualization (graph, chart, microscopy, diagram, etc.)
2. Key data points, trends, or patterns
3. Axes labels, legends, and scales
4. Notable features or findings
5. Scientific context and significance
Be precise, technical, and detailed.
""".strip(),
'presentation': """
Describe this presentation slide image. Include:
1. Main visual elements and their arrangement
2. Key points or messages conveyed
3. Data or information presented
4. Visual hierarchy and emphasis
Keep the description clear and informative.
""".strip(),
'general': """
Describe this image in detail. Include:
1. Main subjects and objects
2. Visual composition and layout
3. Text content (if any)
4. Notable details
5. Overall context and purpose
Be comprehensive and accurate.
""".strip(),
'data_viz': """
Analyze this data visualization. Provide:
1. Type of chart/graph (bar, line, scatter, pie, etc.)
2. Variables and axes
3. Data ranges and scales
4. Key patterns, trends, or outliers
5. Statistical insights
Focus on quantitative accuracy.
""".strip(),
'medical': """
Describe this medical image. Include:
1. Type of medical imaging (X-ray, MRI, CT, microscopy, etc.)
2. Anatomical structures visible
3. Notable findings or abnormalities
4. Image quality and contrast
5. Clinical relevance
Be professional and precise.
""".strip()
}
def convert_with_ai(
input_file: Path,
output_file: Path,
api_key: str,
model: str = "anthropic/claude-sonnet-4.5",
prompt_type: str = "general",
custom_prompt: str = None
) -> bool:
"""
Convert a file to Markdown with AI image descriptions.
Args:
input_file: Path to input file
output_file: Path to output Markdown file
api_key: OpenRouter API key
model: Model name (default: anthropic/claude-sonnet-4.5)
prompt_type: Type of prompt to use
custom_prompt: Custom prompt (overrides prompt_type)
Returns:
True if successful, False otherwise
"""
try:
# Initialize OpenRouter client (OpenAI-compatible)
client = OpenAI(
api_key=api_key,
base_url="https://openrouter.ai/api/v1"
)
# Select prompt
if custom_prompt:
prompt = custom_prompt
else:
prompt = PROMPTS.get(prompt_type, PROMPTS['general'])
print(f"Using model: {model}")
print(f"Prompt type: {prompt_type if not custom_prompt else 'custom'}")
print(f"Converting: {input_file}")
# Create MarkItDown with AI support
md = MarkItDown(
llm_client=client,
llm_model=model,
llm_prompt=prompt
)
# Convert file
result = md.convert(str(input_file))
# Create output with metadata
content = f"# {result.title or input_file.stem}\n\n"
content += f"**Source**: {input_file.name}\n"
content += f"**Format**: {input_file.suffix}\n"
content += f"**AI Model**: {model}\n"
content += f"**Prompt Type**: {prompt_type if not custom_prompt else 'custom'}\n\n"
content += "---\n\n"
content += result.text_content
# Write output
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(content, encoding='utf-8')
print(f"✓ Successfully converted to: {output_file}")
return True
except Exception as e:
print(f"✗ Error: {str(e)}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(
description="Convert documents to Markdown with AI-enhanced image descriptions",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=f"""
Available prompt types:
scientific - For scientific diagrams, graphs, and charts
presentation - For presentation slides
general - General-purpose image description
data_viz - For data visualizations and charts
medical - For medical imaging
Examples:
# Convert a scientific paper
python convert_with_ai.py paper.pdf output.md --prompt-type scientific
# Convert a presentation with custom model
python convert_with_ai.py slides.pptx slides.md --model anthropic/claude-sonnet-4.5 --prompt-type presentation
# Use custom prompt with advanced vision model
python convert_with_ai.py diagram.png diagram.md --model anthropic/claude-sonnet-4.5 --custom-prompt "Describe this technical diagram"
# Set API key via environment variable
export OPENROUTER_API_KEY="sk-or-v1-..."
python convert_with_ai.py image.jpg image.md
Environment Variables:
OPENROUTER_API_KEY OpenRouter API key (required if not passed via --api-key)
Popular Models (use with --model):
anthropic/claude-sonnet-4.5 - Recommended for scientific vision
anthropic/claude-opus-4.5 - Advanced vision model
openai/gpt-4o - GPT-4 Omni (vision support)
openai/gpt-4-vision - GPT-4 Vision
google/gemini-pro-vision - Gemini Pro Vision
"""
)
parser.add_argument('input', type=Path, help='Input file')
parser.add_argument('output', type=Path, help='Output Markdown file')
parser.add_argument(
'--api-key', '-k',
help='OpenRouter API key (or set OPENROUTER_API_KEY env var)'
)
parser.add_argument(
'--model', '-m',
default='anthropic/claude-sonnet-4.5',
help='Model to use via OpenRouter (default: anthropic/claude-sonnet-4.5)'
)
parser.add_argument(
'--prompt-type', '-t',
choices=list(PROMPTS.keys()),
default='general',
help='Type of prompt to use (default: general)'
)
parser.add_argument(
'--custom-prompt', '-p',
help='Custom prompt (overrides --prompt-type)'
)
parser.add_argument(
'--list-prompts', '-l',
action='store_true',
help='List available prompt types and exit'
)
args = parser.parse_args()
# List prompts and exit
if args.list_prompts:
print("Available prompt types:\n")
for name, prompt in PROMPTS.items():
print(f"[{name}]")
print(prompt)
print("\n" + "="*60 + "\n")
sys.exit(0)
# Get API key
api_key = args.api_key or os.environ.get('OPENROUTER_API_KEY')
if not api_key:
print("Error: OpenRouter API key required. Set OPENROUTER_API_KEY environment variable or use --api-key")
print("Get your API key at: https://openrouter.ai/keys")
sys.exit(1)
# Validate input file
if not args.input.exists():
print(f"Error: Input file '{args.input}' does not exist")
sys.exit(1)
# Convert file
success = convert_with_ai(
input_file=args.input,
output_file=args.output,
api_key=api_key,
model=args.model,
prompt_type=args.prompt_type,
custom_prompt=args.custom_prompt
)
sys.exit(0 if success else 1)
if __name__ == '__main__':
main()