Added parallel-web skill

Refactor research lookup skill to enhance backend routing and update documentation. The skill now intelligently selects between the Parallel Chat API and Perplexity sonar-pro-search based on query type. Added compatibility notes, license information, and improved descriptions for clarity. Removed outdated example scripts to streamline the codebase.
2026-03-27 07:09:27 +08:00 · 2026-03-01 07:36:19 -08:00
parent 29c869326e
commit f72b7f4521
13 changed files with 3969 additions and 769 deletions
--- a/scientific-skills/research-lookup/scripts/examples.py
+++ b/scientific-skills/research-lookup/scripts/examples.py
@@ -1,174 +0,0 @@
-#!/usr/bin/env python3
-"""
-Example usage of the Research Lookup skill with automatic model selection.
-
-This script demonstrates:
-1. Automatic model selection based on query complexity
-2. Manual model override options
-3. Batch query processing
-4. Integration with scientific writing workflows
-"""
-
-import os
-from research_lookup import ResearchLookup
-
-
-def example_automatic_selection():
-    """Demonstrate automatic model selection."""
-    print("=" * 80)
-    print("EXAMPLE 1: Automatic Model Selection")
-    print("=" * 80)
-    print()
-    
-    research = ResearchLookup()
-    
-    # Simple lookup - will use Sonar Pro Search
-    query1 = "Recent advances in CRISPR gene editing 2024"
-    print(f"Query: {query1}")
-    print(f"Expected model: Sonar Pro Search (fast lookup)")
-    result1 = research.lookup(query1)
-    print(f"Actual model: {result1.get('model')}")
-    print()
-    
-    # Complex analysis - will use Sonar Reasoning Pro
-    query2 = "Compare and contrast the efficacy of mRNA vaccines versus traditional vaccines"
-    print(f"Query: {query2}")
-    print(f"Expected model: Sonar Reasoning Pro (analytical)")
-    result2 = research.lookup(query2)
-    print(f"Actual model: {result2.get('model')}")
-    print()
-
-
-def example_manual_override():
-    """Demonstrate manual model override."""
-    print("=" * 80)
-    print("EXAMPLE 2: Manual Model Override")
-    print("=" * 80)
-    print()
-    
-    # Force Sonar Pro Search for budget-constrained rapid lookup
-    research_pro = ResearchLookup(force_model='pro')
-    query = "Explain the mechanism of CRISPR-Cas9"
-    print(f"Query: {query}")
-    print(f"Forced model: Sonar Pro Search")
-    result = research_pro.lookup(query)
-    print(f"Model used: {result.get('model')}")
-    print()
-    
-    # Force Sonar Reasoning Pro for critical analysis
-    research_reasoning = ResearchLookup(force_model='reasoning')
-    print(f"Query: {query}")
-    print(f"Forced model: Sonar Reasoning Pro")
-    result = research_reasoning.lookup(query)
-    print(f"Model used: {result.get('model')}")
-    print()
-
-
-def example_batch_queries():
-    """Demonstrate batch query processing."""
-    print("=" * 80)
-    print("EXAMPLE 3: Batch Query Processing")
-    print("=" * 80)
-    print()
-    
-    research = ResearchLookup()
-    
-    # Mix of simple and complex queries
-    queries = [
-        "Recent clinical trials for Alzheimer's disease",  # Sonar Pro Search
-        "Compare deep learning vs traditional ML in drug discovery",  # Sonar Reasoning Pro
-        "Statistical power analysis methods",  # Sonar Pro Search
-    ]
-    
-    print("Processing batch queries...")
-    print("Each query will automatically select the appropriate model")
-    print()
-    
-    results = research.batch_lookup(queries, delay=1.0)
-    
-    for i, result in enumerate(results):
-        print(f"Query {i+1}: {result['query'][:50]}...")
-        print(f"  Model: {result.get('model')}")
-        print(f"  Type: {result.get('model_type')}")
-        print()
-
-
-def example_scientific_writing_workflow():
-    """Demonstrate integration with scientific writing workflow."""
-    print("=" * 80)
-    print("EXAMPLE 4: Scientific Writing Workflow")
-    print("=" * 80)
-    print()
-    
-    research = ResearchLookup()
-    
-    # Literature review phase - use Pro for breadth
-    print("PHASE 1: Literature Review (Breadth)")
-    lit_queries = [
-        "Recent papers on machine learning in genomics 2024",
-        "Clinical applications of AI in radiology",
-        "RNA sequencing analysis methods"
-    ]
-    
-    for query in lit_queries:
-        print(f"  - {query}")
-        # These will automatically use Sonar Pro Search
-    print()
-    
-    # Discussion phase - use Reasoning Pro for synthesis
-    print("PHASE 2: Discussion (Synthesis & Analysis)")
-    discussion_queries = [
-        "Compare the advantages and limitations of different ML approaches in genomics",
-        "Explain the relationship between model interpretability and clinical adoption",
-        "Analyze the ethical implications of AI in medical diagnosis"
-    ]
-    
-    for query in discussion_queries:
-        print(f"  - {query}")
-        # These will automatically use Sonar Reasoning Pro
-    print()
-
-
-def main():
-    """Run all examples (requires OPENROUTER_API_KEY to be set)."""
-    
-    if not os.getenv("OPENROUTER_API_KEY"):
-        print("Note: Set OPENROUTER_API_KEY environment variable to run live queries")
-        print("These examples show the structure without making actual API calls")
-        print()
-    
-    # Uncomment to run examples (requires API key)
-    # example_automatic_selection()
-    # example_manual_override()
-    # example_batch_queries()
-    # example_scientific_writing_workflow()
-    
-    # Show complexity assessment without API calls
-    print("=" * 80)
-    print("COMPLEXITY ASSESSMENT EXAMPLES (No API calls required)")
-    print("=" * 80)
-    print()
-    
-    os.environ.setdefault("OPENROUTER_API_KEY", "test")
-    research = ResearchLookup()
-    
-    test_queries = [
-        ("Recent CRISPR studies", "pro"),
-        ("Compare CRISPR vs TALENs", "reasoning"),
-        ("Explain how CRISPR works", "reasoning"),
-        ("Western blot protocol", "pro"),
-        ("Pros and cons of different sequencing methods", "reasoning"),
-    ]
-    
-    for query, expected in test_queries:
-        complexity = research._assess_query_complexity(query)
-        model_name = "Sonar Reasoning Pro" if complexity == "reasoning" else "Sonar Pro Search"
-        status = "✓" if complexity == expected else "✗"
-        print(f"{status} '{query}'")
-        print(f"  → {model_name}")
-        print()
-
-
-if __name__ == "__main__":
-    main()
-
--- a/scientific-skills/research-lookup/scripts/lookup.py
+++ b/scientific-skills/research-lookup/scripts/lookup.py
@@ -1,187 +0,0 @@
-#!/usr/bin/env python3
-"""
-Research Lookup Tool for Claude Code
-Performs research queries using Perplexity Sonar Pro Search via OpenRouter.
-"""
-
-import os
-import sys
-import json
-from typing import Dict, List, Optional
-
-# Import the main research lookup class
-sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts'))
-from research_lookup import ResearchLookup
-
-
-def format_response(result: Dict) -> str:
-    """Format the research result for display."""
-    if not result["success"]:
-        return f"❌ Research lookup failed: {result['error']}"
-
-    response = result["response"]
-    citations = result["citations"]
-    sources = result.get("sources", [])
-
-    # Format the output for Claude Code
-    output = f"""🔍 **Research Results**
-
-**Query:** {result['query']}
-**Model:** {result['model']}
-**Timestamp:** {result['timestamp']}
-**Note:** Results prioritized by citation count, venue prestige, and author reputation
-
---
-
-{response}
-
-"""
-
-    # Display API-provided sources with venue/citation info
-    if sources:
-        output += f"\n📚 **Sources ({len(sources)}):**\n"
-        output += "_Prioritized by venue quality and citation impact_\n\n"
-        for i, source in enumerate(sources, 1):
-            title = source.get("title", "Untitled")
-            url = source.get("url", "")
-            date = source.get("date", "")
-            snippet = source.get("snippet", "")
-            
-            # Format source entry with available metadata
-            date_str = f" ({date})" if date else ""
-            output += f"{i}. **{title}**{date_str}\n"
-            
-            # Add venue indicator if detectable from URL
-            venue_indicator = _detect_venue_tier(url)
-            if venue_indicator:
-                output += f"   📊 Venue: {venue_indicator}\n"
-            
-            if url:
-                output += f"   🔗 {url}\n"
-            if snippet:
-                output += f"   _{snippet[:150]}{'...' if len(snippet) > 150 else ''}_\n"
-            output += "\n"
-
-    # Display extracted citations (DOIs, etc.)
-    if citations:
-        doi_citations = [c for c in citations if c.get("type") == "doi"]
-        url_citations = [c for c in citations if c.get("type") == "url"]
-        
-        if doi_citations:
-            output += f"\n🔗 **DOI References ({len(doi_citations)}):**\n"
-            for i, citation in enumerate(doi_citations, 1):
-                output += f"{i}. DOI: {citation.get('doi', '')} → {citation.get('url', '')}\n"
-        
-        if url_citations:
-            output += f"\n🌐 **Additional URLs ({len(url_citations)}):**\n"
-            for i, citation in enumerate(url_citations, 1):
-                url = citation.get('url', '')
-                venue = _detect_venue_tier(url)
-                venue_str = f" [{venue}]" if venue else ""
-                output += f"{i}. {url}{venue_str}\n"
-
-    if result.get("usage"):
-        usage = result["usage"]
-        output += f"\n**Usage:** {usage.get('total_tokens', 'N/A')} tokens"
-
-    return output
-
-
-def _detect_venue_tier(url: str) -> Optional[str]:
-    """Detect venue tier from URL to indicate source quality."""
-    if not url:
-        return None
-    
-    url_lower = url.lower()
-    
-    # Tier 1 - Premier venues
-    tier1_indicators = {
-        "nature.com": "Nature (Tier 1)",
-        "science.org": "Science (Tier 1)",
-        "cell.com": "Cell Press (Tier 1)",
-        "nejm.org": "NEJM (Tier 1)",
-        "thelancet.com": "Lancet (Tier 1)",
-        "jamanetwork.com": "JAMA (Tier 1)",
-        "pnas.org": "PNAS (Tier 1)",
-    }
-    
-    # Tier 2 - High-impact specialized
-    tier2_indicators = {
-        "neurips.cc": "NeurIPS (Tier 2 - Top ML)",
-        "icml.cc": "ICML (Tier 2 - Top ML)",
-        "openreview.net": "Top ML Conference (Tier 2)",
-        "aacrjournals.org": "AACR Journals (Tier 2)",
-        "ahajournals.org": "AHA Journals (Tier 2)",
-        "bloodjournal.org": "Blood (Tier 2)",
-        "jci.org": "JCI (Tier 2)",
-    }
-    
-    # Tier 3 - Respected academic sources
-    tier3_indicators = {
-        "springer.com": "Springer",
-        "wiley.com": "Wiley",
-        "elsevier.com": "Elsevier",
-        "oup.com": "Oxford University Press",
-        "arxiv.org": "arXiv (Preprint)",
-        "biorxiv.org": "bioRxiv (Preprint)",
-        "medrxiv.org": "medRxiv (Preprint)",
-        "pubmed": "PubMed",
-        "ncbi.nlm.nih.gov": "NCBI/PubMed",
-        "ieee.org": "IEEE",
-        "acm.org": "ACM",
-    }
-    
-    for domain, label in tier1_indicators.items():
-        if domain in url_lower:
-            return label
-    
-    for domain, label in tier2_indicators.items():
-        if domain in url_lower:
-            return label
-    
-    for domain, label in tier3_indicators.items():
-        if domain in url_lower:
-            return label
-    
-    return None
-
-
-def main():
-    """Main entry point for Claude Code tool."""
-    # Check for API key
-    if not os.getenv("OPENROUTER_API_KEY"):
-        print("❌ Error: OPENROUTER_API_KEY environment variable not set")
-        print("Please set it in your .env file or export it:")
-        print("  export OPENROUTER_API_KEY='your_openrouter_api_key'")
-        return 1
-
-    # Get query from command line arguments
-    if len(sys.argv) < 2:
-        print("❌ Error: No query provided")
-        print("Usage: python lookup.py 'your research query here'")
-        return 1
-
-    query = " ".join(sys.argv[1:])
-
-    try:
-        # Initialize research tool
-        research = ResearchLookup()
-
-        # Perform lookup
-        print(f"🔍 Researching: {query}")
-        result = research.lookup(query)
-
-        # Format and output result
-        formatted_output = format_response(result)
-        print(formatted_output)
-
-        # Return success code
-        return 0 if result["success"] else 1
-
-    except Exception as e:
-        print(f"❌ Error: {str(e)}")
-        return 1
-
-
-if __name__ == "__main__":
-    exit(main())
--- a/scientific-skills/research-lookup/scripts/research_lookup.py
+++ b/scientific-skills/research-lookup/scripts/research_lookup.py
@@ -1,208 +1,269 @@
 #!/usr/bin/env python3
 """
 Research Information Lookup Tool
-Uses Perplexity's Sonar Pro Search model through OpenRouter for academic research queries.
+
+Routes research queries to the best backend:
+  - Parallel Chat API (core model): Default for all general research queries
+  - Perplexity sonar-pro-search (via OpenRouter): Academic-specific paper searches
+
+Environment variables:
+  PARALLEL_API_KEY    - Required for Parallel Chat API (primary backend)
+  OPENROUTER_API_KEY  - Required for Perplexity academic searches (fallback)
 """

 import os
+import sys
 import json
-import requests
+import re
 import time
+import requests
 from datetime import datetime
-from typing import Dict, List, Optional, Any
-from urllib.parse import quote
+from typing import Any, Dict, List, Optional


 class ResearchLookup:
-    """Research information lookup using Perplexity Sonar models via OpenRouter."""
+    """Research information lookup with intelligent backend routing.

-    # Available models
-    MODELS = {
-        "pro": "perplexity/sonar-pro",  # Fast lookup, cost-effective
-        "reasoning": "perplexity/sonar-reasoning-pro",  # Deep analysis with reasoning
-    }
+    Routes queries to the Parallel Chat API (default) or Perplexity
+    sonar-pro-search (academic paper searches only).
+    """

-    # Keywords that indicate complex queries requiring reasoning model
-    REASONING_KEYWORDS = [
-        "compare", "contrast", "analyze", "analysis", "evaluate", "critique",
-        "versus", "vs", "vs.", "compared to", "differences between", "similarities",
-        "meta-analysis", "systematic review", "synthesis", "integrate",
-        "mechanism", "why", "how does", "how do", "explain", "relationship",
-        "theoretical framework", "implications", "interpret", "reasoning",
-        "controversy", "conflicting", "paradox", "debate", "reconcile",
-        "pros and cons", "advantages and disadvantages", "trade-off", "tradeoff",
+    ACADEMIC_KEYWORDS = [
+        "find papers", "find paper", "find articles", "find article",
+        "cite ", "citation", "citations for",
+        "doi ", "doi:", "pubmed", "pmid",
+        "journal article", "peer-reviewed",
+        "systematic review", "meta-analysis",
+        "literature search", "literature on",
+        "academic papers", "academic paper",
+        "research papers on", "research paper on",
+        "published studies", "published study",
+        "scholarly", "scholar",
+        "arxiv", "preprint",
+        "foundational papers", "seminal papers", "landmark papers",
+        "highly cited", "most cited",
    ]

-    def __init__(self, force_model: Optional[str] = None):
-        """
-        Initialize the research lookup tool.
-        
-        Args:
-            force_model: Optional model override ('pro' or 'reasoning'). 
-                        If None, model is auto-selected based on query complexity.
-        """
-        self.api_key = os.getenv("OPENROUTER_API_KEY")
-        if not self.api_key:
-            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+    PARALLEL_SYSTEM_PROMPT = (
+        "You are a deep research analyst. Provide a comprehensive, well-cited "
+        "research report on the user's topic. Include:\n"
+        "- Key findings with specific data, statistics, and quantitative evidence\n"
+        "- Detailed analysis organized by themes\n"
+        "- Multiple authoritative sources cited inline\n"
+        "- Methodologies and implications where relevant\n"
+        "- Future outlook and research gaps\n"
+        "Use markdown formatting with clear section headers. "
+        "Prioritize authoritative and recent sources."
+    )

-        self.base_url = "https://openrouter.ai/api/v1"
-        self.force_model = force_model
-        self.headers = {
-            "Authorization": f"Bearer {self.api_key}",
+    CHAT_BASE_URL = "https://api.parallel.ai"
+
+    def __init__(self, force_backend: Optional[str] = None):
+        """Initialize the research lookup tool.
+
+        Args:
+            force_backend: Force a specific backend ('parallel' or 'perplexity').
+                          If None, backend is auto-selected based on query content.
+        """
+        self.force_backend = force_backend
+        self.parallel_available = bool(os.getenv("PARALLEL_API_KEY"))
+        self.perplexity_available = bool(os.getenv("OPENROUTER_API_KEY"))
+
+        if not self.parallel_available and not self.perplexity_available:
+            raise ValueError(
+                "No API keys found. Set at least one of:\n"
+                "  PARALLEL_API_KEY (for Parallel Chat API - primary)\n"
+                "  OPENROUTER_API_KEY (for Perplexity academic search - fallback)"
+            )
+
+    def _select_backend(self, query: str) -> str:
+        """Select the best backend for a query."""
+        if self.force_backend:
+            if self.force_backend == "perplexity" and self.perplexity_available:
+                return "perplexity"
+            if self.force_backend == "parallel" and self.parallel_available:
+                return "parallel"
+
+        query_lower = query.lower()
+        is_academic = any(kw in query_lower for kw in self.ACADEMIC_KEYWORDS)
+
+        if is_academic and self.perplexity_available:
+            return "perplexity"
+
+        if self.parallel_available:
+            return "parallel"
+
+        if self.perplexity_available:
+            return "perplexity"
+
+        raise ValueError("No backend available. Check API keys.")
+
+    # ------------------------------------------------------------------
+    # Parallel Chat API backend
+    # ------------------------------------------------------------------
+
+    def _get_chat_client(self):
+        """Lazy-load and cache the OpenAI client for Parallel Chat API."""
+        if not hasattr(self, "_chat_client"):
+            try:
+                from openai import OpenAI
+            except ImportError:
+                raise ImportError(
+                    "The 'openai' package is required for Parallel Chat API.\n"
+                    "Install it with: pip install openai"
+                )
+            self._chat_client = OpenAI(
+                api_key=os.getenv("PARALLEL_API_KEY"),
+                base_url=self.CHAT_BASE_URL,
+            )
+        return self._chat_client
+
+    def _parallel_lookup(self, query: str) -> Dict[str, Any]:
+        """Run research via the Parallel Chat API (core model)."""
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        model = "core"
+
+        try:
+            client = self._get_chat_client()
+
+            print(f"[Research] Parallel Chat API (model={model})...", file=sys.stderr)
+
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": self.PARALLEL_SYSTEM_PROMPT},
+                    {"role": "user", "content": query},
+                ],
+                stream=False,
+            )
+
+            content = ""
+            if response.choices and len(response.choices) > 0:
+                content = response.choices[0].message.content or ""
+
+            api_citations = self._extract_basis_citations(response)
+            text_citations = self._extract_citations_from_text(content)
+
+            return {
+                "success": True,
+                "query": query,
+                "response": content,
+                "citations": api_citations + text_citations,
+                "sources": api_citations,
+                "timestamp": timestamp,
+                "backend": "parallel",
+                "model": f"parallel-chat/{model}",
+            }
+
+        except Exception as e:
+            return {
+                "success": False,
+                "query": query,
+                "error": str(e),
+                "timestamp": timestamp,
+                "backend": "parallel",
+                "model": f"parallel-chat/{model}",
+            }
+
+    def _extract_basis_citations(self, response) -> List[Dict[str, str]]:
+        """Extract citation sources from the Chat API research basis."""
+        citations = []
+        basis = getattr(response, "basis", None)
+        if not basis:
+            return citations
+
+        seen_urls = set()
+        if isinstance(basis, list):
+            for item in basis:
+                cits = (
+                    item.get("citations", []) if isinstance(item, dict)
+                    else getattr(item, "citations", None) or []
+                )
+                for cit in cits:
+                    url = cit.get("url", "") if isinstance(cit, dict) else getattr(cit, "url", "")
+                    if url and url not in seen_urls:
+                        seen_urls.add(url)
+                        title = cit.get("title", "") if isinstance(cit, dict) else getattr(cit, "title", "")
+                        excerpts = cit.get("excerpts", []) if isinstance(cit, dict) else getattr(cit, "excerpts", [])
+                        citations.append({
+                            "type": "source",
+                            "url": url,
+                            "title": title,
+                            "excerpts": excerpts,
+                        })
+
+        return citations
+
+    # ------------------------------------------------------------------
+    # Perplexity academic search backend
+    # ------------------------------------------------------------------
+
+    def _perplexity_lookup(self, query: str) -> Dict[str, Any]:
+        """Run academic search via Perplexity sonar-pro-search through OpenRouter."""
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+        api_key = os.getenv("OPENROUTER_API_KEY")
+        model = "perplexity/sonar-pro-search"
+
+        headers = {
+            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://scientific-writer.local",
-            "X-Title": "Scientific Writer Research Tool"
+            "X-Title": "Scientific Writer Research Tool",
        }

-    def _select_model(self, query: str) -> str:
-        """
-        Select the appropriate model based on query complexity.
-        
-        Args:
-            query: The research query
-            
-        Returns:
-            Model identifier string
-        """
-        if self.force_model:
-            return self.MODELS.get(self.force_model, self.MODELS["reasoning"])
-        
-        # Check for reasoning keywords (case-insensitive)
-        query_lower = query.lower()
-        for keyword in self.REASONING_KEYWORDS:
-            if keyword in query_lower:
-                return self.MODELS["reasoning"]
-        
-        # Check for multiple questions or complex structure
-        question_count = query.count("?")
-        if question_count >= 2:
-            return self.MODELS["reasoning"]
-        
-        # Check for very long queries (likely complex)
-        if len(query) > 200:
-            return self.MODELS["reasoning"]
-        
-        # Default to pro for simple lookups
-        return self.MODELS["pro"]
+        research_prompt = self._format_academic_prompt(query)
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are an academic research assistant specializing in finding "
+                    "HIGH-IMPACT, INFLUENTIAL research.\n\n"
+                    "QUALITY PRIORITIZATION (CRITICAL):\n"
+                    "- ALWAYS prefer highly-cited papers over obscure publications\n"
+                    "- ALWAYS prioritize Tier-1 venues: Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS\n"
+                    "- ALWAYS prefer papers from established researchers\n"
+                    "- Include citation counts when known (e.g., 'cited 500+ times')\n"
+                    "- Quality matters more than quantity\n\n"
+                    "VENUE HIERARCHY:\n"
+                    "1. Nature/Science/Cell family, NEJM, Lancet, JAMA (highest)\n"
+                    "2. High-impact specialized journals (IF>10), top conferences (NeurIPS, ICML, ICLR)\n"
+                    "3. Respected field-specific journals (IF 5-10)\n"
+                    "4. Other peer-reviewed sources (only if no better option)\n\n"
+                    "Focus exclusively on scholarly sources. Prioritize recent literature (2020-2026) "
+                    "and provide complete citations with DOIs."
+                ),
+            },
+            {"role": "user", "content": research_prompt},
+        ]

-    def _make_request(self, messages: List[Dict[str, str]], model: str, **kwargs) -> Dict[str, Any]:
-        """Make a request to the OpenRouter API with academic search mode."""
        data = {
            "model": model,
            "messages": messages,
            "max_tokens": 8000,
-            "temperature": 0.1,  # Low temperature for factual research
-            # Perplexity-specific parameters for academic search
-            "search_mode": "academic",  # Prioritize scholarly sources (peer-reviewed papers, journals)
-            "search_context_size": "high",  # Always use high context for deeper research
-            **kwargs
+            "temperature": 0.1,
+            "search_mode": "academic",
+            "search_context_size": "high",
        }

        try:
            response = requests.post(
-                f"{self.base_url}/chat/completions",
-                headers=self.headers,
+                "https://openrouter.ai/api/v1/chat/completions",
+                headers=headers,
                json=data,
-                timeout=90  # Increased timeout for academic search
+                timeout=90,
            )
            response.raise_for_status()
-            return response.json()
-        except requests.exceptions.RequestException as e:
-            raise Exception(f"API request failed: {str(e)}")
+            resp_json = response.json()

-    def _format_research_prompt(self, query: str) -> str:
-        """Format the query for optimal research results."""
-        return f"""You are an expert research assistant. Please provide comprehensive, accurate research information for the following query: "{query}"
-
-IMPORTANT INSTRUCTIONS:
-1. Focus on ACADEMIC and SCIENTIFIC sources (peer-reviewed papers, reputable journals, institutional research)
-2. Include RECENT information (prioritize 2020-2026 publications)
-3. Provide COMPLETE citations with authors, title, journal/conference, year, and DOI when available
-4. Structure your response with clear sections and proper attribution
-5. Be comprehensive but concise - aim for 800-1200 words
-6. Include key findings, methodologies, and implications when relevant
-7. Note any controversies, limitations, or conflicting evidence
-
-PAPER QUALITY AND POPULARITY PRIORITIZATION (CRITICAL):
-8. ALWAYS prioritize HIGHLY-CITED papers over obscure publications:
-   - Recent papers (0-3 years): prefer 20+ citations, highlight 100+ as highly influential
-   - Mid-age papers (3-7 years): prefer 100+ citations, highlight 500+ as landmark
-   - Older papers (7+ years): prefer 500+ citations, highlight 1000+ as foundational
-9. ALWAYS prioritize papers from TOP-TIER VENUES:
-   - Tier 1 (highest priority): Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS, Nature Medicine, Nature Biotechnology
-   - Tier 2 (high priority): High-impact specialized journals (IF>10), top conferences (NeurIPS, ICML, ICLR for AI/ML)
-   - Tier 3: Respected specialized journals (IF 5-10)
-   - Only cite lower-tier venues if directly relevant AND no better source exists
-10. PREFER papers from ESTABLISHED, REPUTABLE AUTHORS:
-    - Senior researchers with high h-index and multiple high-impact publications
-    - Leading research groups at recognized institutions
-    - Authors with recognized expertise (awards, editorial positions)
-11. For EACH citation, include when available:
-    - Approximate citation count (e.g., "cited 500+ times")
-    - Journal/venue tier indicator
-    - Notable author credentials if relevant
-12. PRIORITIZE papers that DIRECTLY address the research question over tangentially related work
-
-RESPONSE FORMAT:
- Start with a brief summary (2-3 sentences)
- Present key findings and studies in organized sections
- Rank papers by impact: most influential/cited first
- End with future directions or research gaps if applicable
- Include 5-8 high-quality citations, emphasizing Tier-1 venues and highly-cited papers
-
-Remember: Quality over quantity. Prioritize influential, highly-cited papers from prestigious venues and established researchers."""
-
-    def lookup(self, query: str) -> Dict[str, Any]:
-        """Perform a research lookup for the given query."""
-        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        
-        # Select model based on query complexity
-        model = self._select_model(query)
-
-        # Format the research prompt
-        research_prompt = self._format_research_prompt(query)
-
-        # Prepare messages for the API with system message for academic mode
-        messages = [
-            {
-                "role": "system", 
-                "content": """You are an academic research assistant specializing in finding HIGH-IMPACT, INFLUENTIAL research.
-
-QUALITY PRIORITIZATION (CRITICAL):
- ALWAYS prefer highly-cited papers over obscure publications
- ALWAYS prioritize Tier-1 venues: Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS, and their family journals
- ALWAYS prefer papers from established researchers with strong publication records
- Include citation counts when known (e.g., "cited 500+ times")
- Quality matters more than quantity - 5 excellent papers beats 10 mediocre ones
-
-VENUE HIERARCHY:
-1. Nature/Science/Cell family, NEJM, Lancet, JAMA (highest priority)
-2. High-impact specialized journals (IF>10), top ML conferences (NeurIPS, ICML, ICLR)
-3. Respected field-specific journals (IF 5-10)
-4. Other peer-reviewed sources (only if no better option exists)
-
-Focus exclusively on scholarly sources: peer-reviewed journals, academic papers, research institutions. Prioritize recent academic literature (2020-2026) and provide complete citations with DOIs. Always indicate paper impact through citation counts and venue prestige."""
-            },
-            {"role": "user", "content": research_prompt}
-        ]
-
-        try:
-            # Make the API request
-            response = self._make_request(messages, model)
-
-            # Extract the response content
-            if "choices" in response and len(response["choices"]) > 0:
-                choice = response["choices"][0]
+            if "choices" in resp_json and len(resp_json["choices"]) > 0:
+                choice = resp_json["choices"][0]
                if "message" in choice and "content" in choice["message"]:
                    content = choice["message"]["content"]

-                    # Extract citations from API response (Perplexity provides these)
-                    api_citations = self._extract_api_citations(response, choice)
-                    
-                    # Also extract citations from text as fallback
+                    api_citations = self._extract_api_citations(resp_json, choice)
                    text_citations = self._extract_citations_from_text(content)
-                    
-                    # Combine: prioritize API citations, add text citations if no duplicates
                    citations = api_citations + text_citations

                    return {
@@ -210,10 +271,11 @@ Focus exclusively on scholarly sources: peer-reviewed journals, academic papers,
                        "query": query,
                        "response": content,
                        "citations": citations,
-                        "sources": api_citations,  # Separate field for API-provided sources
+                        "sources": api_citations,
                        "timestamp": timestamp,
+                        "backend": "perplexity",
                        "model": model,
-                        "usage": response.get("usage", {})
+                        "usage": resp_json.get("usage", {}),
                    }
                else:
                    raise Exception("Invalid response format from API")
@@ -226,22 +288,54 @@ Focus exclusively on scholarly sources: peer-reviewed journals, academic papers,
                "query": query,
                "error": str(e),
                "timestamp": timestamp,
-                "model": model
+                "backend": "perplexity",
+                "model": model,
            }

+    # ------------------------------------------------------------------
+    # Shared utilities
+    # ------------------------------------------------------------------
+
+    def _format_academic_prompt(self, query: str) -> str:
+        """Format a query for academic research results via Perplexity."""
+        return f"""You are an expert research assistant. Please provide comprehensive, accurate research information for the following query: "{query}"
+
+IMPORTANT INSTRUCTIONS:
+1. Focus on ACADEMIC and SCIENTIFIC sources (peer-reviewed papers, reputable journals, institutional research)
+2. Include RECENT information (prioritize 2020-2026 publications)
+3. Provide COMPLETE citations with authors, title, journal/conference, year, and DOI when available
+4. Structure your response with clear sections and proper attribution
+5. Be comprehensive but concise - aim for 800-1200 words
+6. Include key findings, methodologies, and implications when relevant
+7. Note any controversies, limitations, or conflicting evidence
+
+PAPER QUALITY PRIORITIZATION (CRITICAL):
+8. ALWAYS prioritize HIGHLY-CITED papers over obscure publications
+9. ALWAYS prioritize papers from TOP-TIER VENUES (Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS)
+10. PREFER papers from ESTABLISHED, REPUTABLE AUTHORS
+11. For EACH citation include when available: citation count, venue tier, author credentials
+12. PRIORITIZE papers that DIRECTLY address the research question
+
+RESPONSE FORMAT:
+- Start with a brief summary (2-3 sentences)
+- Present key findings and studies in organized sections
+- Rank papers by impact: most influential/cited first
+- End with future directions or research gaps if applicable
+- Include 5-8 high-quality citations
+
+Remember: Quality over quantity. Prioritize influential, highly-cited papers from prestigious venues."""
+
    def _extract_api_citations(self, response: Dict[str, Any], choice: Dict[str, Any]) -> List[Dict[str, str]]:
        """Extract citations from Perplexity API response fields."""
        citations = []
-        
-        # Perplexity returns citations in search_results field (new format)
-        # Check multiple possible locations where OpenRouter might place them
+
        search_results = (
-            response.get("search_results") or 
-            choice.get("search_results") or
-            choice.get("message", {}).get("search_results") or
-            []
+            response.get("search_results")
+            or choice.get("search_results")
+            or choice.get("message", {}).get("search_results")
+            or []
        )
-        
+
        for result in search_results:
            citation = {
                "type": "source",
@@ -249,162 +343,164 @@ Focus exclusively on scholarly sources: peer-reviewed journals, academic papers,
                "url": result.get("url", ""),
                "date": result.get("date", ""),
            }
-            # Add snippet if available (newer API feature)
            if result.get("snippet"):
-                citation["snippet"] = result.get("snippet")
+                citation["snippet"] = result["snippet"]
            citations.append(citation)
-        
-        # Also check for legacy citations field (backward compatibility)
+
        legacy_citations = (
-            response.get("citations") or
-            choice.get("citations") or
-            choice.get("message", {}).get("citations") or
-            []
+            response.get("citations")
+            or choice.get("citations")
+            or choice.get("message", {}).get("citations")
+            or []
        )
-        
+
        for url in legacy_citations:
            if isinstance(url, str):
-                # Legacy format was just URLs
-                citations.append({
-                    "type": "source",
-                    "url": url,
-                    "title": "",
-                    "date": ""
-                })
+                citations.append({"type": "source", "url": url, "title": "", "date": ""})
            elif isinstance(url, dict):
                citations.append({
                    "type": "source",
                    "url": url.get("url", ""),
                    "title": url.get("title", ""),
-                    "date": url.get("date", "")
+                    "date": url.get("date", ""),
                })
-        
+
        return citations

    def _extract_citations_from_text(self, text: str) -> List[Dict[str, str]]:
-        """Extract potential citations from the response text as fallback."""
-        import re
+        """Extract DOIs and academic URLs from response text as fallback."""
        citations = []

-        # Look for DOI patterns first (most reliable)
-        # Matches: doi:10.xxx, DOI: 10.xxx, https://doi.org/10.xxx
        doi_pattern = r'(?:doi[:\s]*|https?://(?:dx\.)?doi\.org/)(10\.[0-9]{4,}/[^\s\)\]\,\[\<\>]+)'
        doi_matches = re.findall(doi_pattern, text, re.IGNORECASE)
        seen_dois = set()

        for doi in doi_matches:
-            # Clean up DOI - remove trailing punctuation and brackets
-            doi_clean = doi.strip().rstrip('.,;:)]')
+            doi_clean = doi.strip().rstrip(".,;:)]")
            if doi_clean and doi_clean not in seen_dois:
                seen_dois.add(doi_clean)
                citations.append({
                    "type": "doi",
                    "doi": doi_clean,
-                    "url": f"https://doi.org/{doi_clean}"
+                    "url": f"https://doi.org/{doi_clean}",
                })

-        # Look for URLs that might be sources
-        url_pattern = r'https?://[^\s\)\]\,\<\>\"\']+(?:arxiv\.org|pubmed|ncbi\.nlm\.nih\.gov|nature\.com|science\.org|wiley\.com|springer\.com|ieee\.org|acm\.org)[^\s\)\]\,\<\>\"\']*'
+        url_pattern = (
+            r'https?://[^\s\)\]\,\<\>\"\']+(?:arxiv\.org|pubmed|ncbi\.nlm\.nih\.gov|'
+            r'nature\.com|science\.org|wiley\.com|springer\.com|ieee\.org|acm\.org)'
+            r'[^\s\)\]\,\<\>\"\']*'
+        )
        url_matches = re.findall(url_pattern, text, re.IGNORECASE)
        seen_urls = set()
-        
+
        for url in url_matches:
-            url_clean = url.rstrip('.')
+            url_clean = url.rstrip(".")
            if url_clean not in seen_urls:
                seen_urls.add(url_clean)
-                citations.append({
-                    "type": "url",
-                    "url": url_clean
-                })
+                citations.append({"type": "url", "url": url_clean})

        return citations

-    def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List[Dict[str, Any]]:
-        """Perform multiple research lookups with optional delay between requests."""
-        results = []
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------

+    def lookup(self, query: str) -> Dict[str, Any]:
+        """Perform a research lookup, routing to the best backend.
+
+        Parallel Chat API is used by default. Perplexity sonar-pro-search
+        is used only for academic-specific queries (paper searches, DOI lookups).
+        """
+        backend = self._select_backend(query)
+        print(f"[Research] Backend: {backend} | Query: {query[:80]}...", file=sys.stderr)
+
+        if backend == "parallel":
+            return self._parallel_lookup(query)
+        else:
+            return self._perplexity_lookup(query)
+
+    def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List[Dict[str, Any]]:
+        """Perform multiple research lookups with delay between requests."""
+        results = []
        for i, query in enumerate(queries):
            if i > 0 and delay > 0:
-                time.sleep(delay)  # Rate limiting
-
+                time.sleep(delay)
            result = self.lookup(query)
            results.append(result)
-
-            # Print progress
-            print(f"[Research] Completed query {i+1}/{len(queries)}: {query[:50]}...")
-
+            print(f"[Research] Completed query {i+1}/{len(queries)}: {query[:50]}...", file=sys.stderr)
        return results

-    def get_model_info(self) -> Dict[str, Any]:
-        """Get information about available models from OpenRouter."""
-        try:
-            response = requests.get(
-                f"{self.base_url}/models",
-                headers=self.headers,
-                timeout=30
-            )
-            response.raise_for_status()
-            return response.json()
-        except Exception as e:
-            return {"error": str(e)}

+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------

 def main():
-    """Command-line interface for testing the research lookup tool."""
+    """Command-line interface for the research lookup tool."""
    import argparse
-    import sys

-    parser = argparse.ArgumentParser(description="Research Information Lookup Tool")
+    parser = argparse.ArgumentParser(
+        description="Research Information Lookup Tool (Parallel Chat API + Perplexity)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # General research (uses Parallel Chat API, core model)
+  python research_lookup.py "latest advances in quantum computing 2025"
+
+  # Academic paper search (auto-routes to Perplexity)
+  python research_lookup.py "find papers on CRISPR gene editing clinical trials"
+
+  # Force a specific backend
+  python research_lookup.py "topic" --force-backend parallel
+  python research_lookup.py "topic" --force-backend perplexity
+
+  # Save output to file
+  python research_lookup.py "topic" -o results.txt
+
+  # JSON output
+  python research_lookup.py "topic" --json -o results.json
+        """,
+    )
    parser.add_argument("query", nargs="?", help="Research query to look up")
-    parser.add_argument("--model-info", action="store_true", help="Show available models")
    parser.add_argument("--batch", nargs="+", help="Run multiple queries")
-    parser.add_argument("--force-model", choices=["pro", "reasoning"], 
-                        help="Force specific model: 'pro' for fast lookup, 'reasoning' for deep analysis")
-    parser.add_argument("-o", "--output", help="Write output to file instead of stdout")
-    parser.add_argument("--json", action="store_true", help="Output results as JSON")
+    parser.add_argument(
+        "--force-backend",
+        choices=["parallel", "perplexity"],
+        help="Force a specific backend (default: auto-select)",
+    )
+    parser.add_argument("-o", "--output", help="Write output to file")
+    parser.add_argument("--json", action="store_true", help="Output as JSON")

    args = parser.parse_args()
-    
-    # Set up output destination
+
    output_file = None
    if args.output:
-        output_file = open(args.output, 'w', encoding='utf-8')
-    
+        output_file = open(args.output, "w", encoding="utf-8")
+
    def write_output(text):
-        """Write to file or stdout."""
        if output_file:
-            output_file.write(text + '\n')
+            output_file.write(text + "\n")
        else:
            print(text)

-    # Check for API key
-    if not os.getenv("OPENROUTER_API_KEY"):
-        print("Error: OPENROUTER_API_KEY environment variable not set", file=sys.stderr)
-        print("Please set it in your .env file or export it:", file=sys.stderr)
-        print("  export OPENROUTER_API_KEY='your_openrouter_api_key'", file=sys.stderr)
+    has_parallel = bool(os.getenv("PARALLEL_API_KEY"))
+    has_perplexity = bool(os.getenv("OPENROUTER_API_KEY"))
+    if not has_parallel and not has_perplexity:
+        print("Error: No API keys found. Set at least one:", file=sys.stderr)
+        print("  export PARALLEL_API_KEY='...'    (primary - Parallel Chat API)", file=sys.stderr)
+        print("  export OPENROUTER_API_KEY='...'   (fallback - Perplexity academic)", file=sys.stderr)
+        if output_file:
+            output_file.close()
+        return 1
+
+    if not args.query and not args.batch:
+        parser.print_help()
        if output_file:
            output_file.close()
        return 1

    try:
-        research = ResearchLookup(force_model=args.force_model)
-
-        if args.model_info:
-            write_output("Available models from OpenRouter:")
-            models = research.get_model_info()
-            if "data" in models:
-                for model in models["data"]:
-                    if "perplexity" in model["id"].lower():
-                        write_output(f"  - {model['id']}: {model.get('name', 'N/A')}")
-            if output_file:
-                output_file.close()
-            return 0
-
-        if not args.query and not args.batch:
-            print("Error: No query provided. Use --model-info to see available models.", file=sys.stderr)
-            if output_file:
-                output_file.close()
-            return 1
+        research = ResearchLookup(force_backend=args.force_backend)

        if args.batch:
            print(f"Running batch research for {len(args.batch)} queries...", file=sys.stderr)
@@ -413,27 +509,24 @@ def main():
            print(f"Researching: {args.query}", file=sys.stderr)
            results = [research.lookup(args.query)]

-        # Output as JSON if requested
        if args.json:
-            write_output(json.dumps(results, indent=2, ensure_ascii=False))
+            write_output(json.dumps(results, indent=2, ensure_ascii=False, default=str))
            if output_file:
                output_file.close()
            return 0

-        # Display results in human-readable format
        for i, result in enumerate(results):
            if result["success"]:
                write_output(f"\n{'='*80}")
                write_output(f"Query {i+1}: {result['query']}")
                write_output(f"Timestamp: {result['timestamp']}")
-                write_output(f"Model: {result['model']}")
+                write_output(f"Backend: {result.get('backend', 'unknown')} | Model: {result.get('model', 'unknown')}")
                write_output(f"{'='*80}")
                write_output(result["response"])

-                # Display API-provided sources first (most reliable)
                sources = result.get("sources", [])
                if sources:
-                    write_output(f"\n📚 Sources ({len(sources)}):")
+                    write_output(f"\nSources ({len(sources)}):")
                    for j, source in enumerate(sources):
                        title = source.get("title", "Untitled")
                        url = source.get("url", "")
@@ -443,11 +536,10 @@ def main():
                        if url:
                            write_output(f"      {url}")

-                # Display additional text-extracted citations
                citations = result.get("citations", [])
                text_citations = [c for c in citations if c.get("type") in ("doi", "url")]
                if text_citations:
-                    write_output(f"\n🔗 Additional References ({len(text_citations)}):")
+                    write_output(f"\nAdditional References ({len(text_citations)}):")
                    for j, citation in enumerate(text_citations):
                        if citation.get("type") == "doi":
                            write_output(f"  [{j+1}] DOI: {citation.get('doi', '')} - {citation.get('url', '')}")
@@ -464,11 +556,11 @@ def main():
        return 0

    except Exception as e:
-        print(f"Error: {str(e)}", file=sys.stderr)
+        print(f"Error: {e}", file=sys.stderr)
        if output_file:
            output_file.close()
        return 1


 if __name__ == "__main__":
-    exit(main())
+    sys.exit(main())