mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Added parallel-web skill
Refactor research lookup skill to enhance backend routing and update documentation. The skill now intelligently selects between the Parallel Chat API and Perplexity sonar-pro-search based on query type. Added compatibility notes, license information, and improved descriptions for clarity. Removed outdated example scripts to streamline the codebase.
This commit is contained in:
@@ -1,174 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Example usage of the Research Lookup skill with automatic model selection.
|
||||
|
||||
This script demonstrates:
|
||||
1. Automatic model selection based on query complexity
|
||||
2. Manual model override options
|
||||
3. Batch query processing
|
||||
4. Integration with scientific writing workflows
|
||||
"""
|
||||
|
||||
import os
|
||||
from research_lookup import ResearchLookup
|
||||
|
||||
|
||||
def example_automatic_selection():
|
||||
"""Demonstrate automatic model selection."""
|
||||
print("=" * 80)
|
||||
print("EXAMPLE 1: Automatic Model Selection")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
research = ResearchLookup()
|
||||
|
||||
# Simple lookup - will use Sonar Pro Search
|
||||
query1 = "Recent advances in CRISPR gene editing 2024"
|
||||
print(f"Query: {query1}")
|
||||
print(f"Expected model: Sonar Pro Search (fast lookup)")
|
||||
result1 = research.lookup(query1)
|
||||
print(f"Actual model: {result1.get('model')}")
|
||||
print()
|
||||
|
||||
# Complex analysis - will use Sonar Reasoning Pro
|
||||
query2 = "Compare and contrast the efficacy of mRNA vaccines versus traditional vaccines"
|
||||
print(f"Query: {query2}")
|
||||
print(f"Expected model: Sonar Reasoning Pro (analytical)")
|
||||
result2 = research.lookup(query2)
|
||||
print(f"Actual model: {result2.get('model')}")
|
||||
print()
|
||||
|
||||
|
||||
def example_manual_override():
|
||||
"""Demonstrate manual model override."""
|
||||
print("=" * 80)
|
||||
print("EXAMPLE 2: Manual Model Override")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
# Force Sonar Pro Search for budget-constrained rapid lookup
|
||||
research_pro = ResearchLookup(force_model='pro')
|
||||
query = "Explain the mechanism of CRISPR-Cas9"
|
||||
print(f"Query: {query}")
|
||||
print(f"Forced model: Sonar Pro Search")
|
||||
result = research_pro.lookup(query)
|
||||
print(f"Model used: {result.get('model')}")
|
||||
print()
|
||||
|
||||
# Force Sonar Reasoning Pro for critical analysis
|
||||
research_reasoning = ResearchLookup(force_model='reasoning')
|
||||
print(f"Query: {query}")
|
||||
print(f"Forced model: Sonar Reasoning Pro")
|
||||
result = research_reasoning.lookup(query)
|
||||
print(f"Model used: {result.get('model')}")
|
||||
print()
|
||||
|
||||
|
||||
def example_batch_queries():
|
||||
"""Demonstrate batch query processing."""
|
||||
print("=" * 80)
|
||||
print("EXAMPLE 3: Batch Query Processing")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
research = ResearchLookup()
|
||||
|
||||
# Mix of simple and complex queries
|
||||
queries = [
|
||||
"Recent clinical trials for Alzheimer's disease", # Sonar Pro Search
|
||||
"Compare deep learning vs traditional ML in drug discovery", # Sonar Reasoning Pro
|
||||
"Statistical power analysis methods", # Sonar Pro Search
|
||||
]
|
||||
|
||||
print("Processing batch queries...")
|
||||
print("Each query will automatically select the appropriate model")
|
||||
print()
|
||||
|
||||
results = research.batch_lookup(queries, delay=1.0)
|
||||
|
||||
for i, result in enumerate(results):
|
||||
print(f"Query {i+1}: {result['query'][:50]}...")
|
||||
print(f" Model: {result.get('model')}")
|
||||
print(f" Type: {result.get('model_type')}")
|
||||
print()
|
||||
|
||||
|
||||
def example_scientific_writing_workflow():
|
||||
"""Demonstrate integration with scientific writing workflow."""
|
||||
print("=" * 80)
|
||||
print("EXAMPLE 4: Scientific Writing Workflow")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
research = ResearchLookup()
|
||||
|
||||
# Literature review phase - use Pro for breadth
|
||||
print("PHASE 1: Literature Review (Breadth)")
|
||||
lit_queries = [
|
||||
"Recent papers on machine learning in genomics 2024",
|
||||
"Clinical applications of AI in radiology",
|
||||
"RNA sequencing analysis methods"
|
||||
]
|
||||
|
||||
for query in lit_queries:
|
||||
print(f" - {query}")
|
||||
# These will automatically use Sonar Pro Search
|
||||
print()
|
||||
|
||||
# Discussion phase - use Reasoning Pro for synthesis
|
||||
print("PHASE 2: Discussion (Synthesis & Analysis)")
|
||||
discussion_queries = [
|
||||
"Compare the advantages and limitations of different ML approaches in genomics",
|
||||
"Explain the relationship between model interpretability and clinical adoption",
|
||||
"Analyze the ethical implications of AI in medical diagnosis"
|
||||
]
|
||||
|
||||
for query in discussion_queries:
|
||||
print(f" - {query}")
|
||||
# These will automatically use Sonar Reasoning Pro
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all examples (requires OPENROUTER_API_KEY to be set)."""
|
||||
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
print("Note: Set OPENROUTER_API_KEY environment variable to run live queries")
|
||||
print("These examples show the structure without making actual API calls")
|
||||
print()
|
||||
|
||||
# Uncomment to run examples (requires API key)
|
||||
# example_automatic_selection()
|
||||
# example_manual_override()
|
||||
# example_batch_queries()
|
||||
# example_scientific_writing_workflow()
|
||||
|
||||
# Show complexity assessment without API calls
|
||||
print("=" * 80)
|
||||
print("COMPLEXITY ASSESSMENT EXAMPLES (No API calls required)")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
os.environ.setdefault("OPENROUTER_API_KEY", "test")
|
||||
research = ResearchLookup()
|
||||
|
||||
test_queries = [
|
||||
("Recent CRISPR studies", "pro"),
|
||||
("Compare CRISPR vs TALENs", "reasoning"),
|
||||
("Explain how CRISPR works", "reasoning"),
|
||||
("Western blot protocol", "pro"),
|
||||
("Pros and cons of different sequencing methods", "reasoning"),
|
||||
]
|
||||
|
||||
for query, expected in test_queries:
|
||||
complexity = research._assess_query_complexity(query)
|
||||
model_name = "Sonar Reasoning Pro" if complexity == "reasoning" else "Sonar Pro Search"
|
||||
status = "✓" if complexity == expected else "✗"
|
||||
print(f"{status} '{query}'")
|
||||
print(f" → {model_name}")
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -1,187 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Research Lookup Tool for Claude Code
|
||||
Performs research queries using Perplexity Sonar Pro Search via OpenRouter.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
# Import the main research lookup class
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts'))
|
||||
from research_lookup import ResearchLookup
|
||||
|
||||
|
||||
def format_response(result: Dict) -> str:
|
||||
"""Format the research result for display."""
|
||||
if not result["success"]:
|
||||
return f"❌ Research lookup failed: {result['error']}"
|
||||
|
||||
response = result["response"]
|
||||
citations = result["citations"]
|
||||
sources = result.get("sources", [])
|
||||
|
||||
# Format the output for Claude Code
|
||||
output = f"""🔍 **Research Results**
|
||||
|
||||
**Query:** {result['query']}
|
||||
**Model:** {result['model']}
|
||||
**Timestamp:** {result['timestamp']}
|
||||
**Note:** Results prioritized by citation count, venue prestige, and author reputation
|
||||
|
||||
---
|
||||
|
||||
{response}
|
||||
|
||||
"""
|
||||
|
||||
# Display API-provided sources with venue/citation info
|
||||
if sources:
|
||||
output += f"\n📚 **Sources ({len(sources)}):**\n"
|
||||
output += "_Prioritized by venue quality and citation impact_\n\n"
|
||||
for i, source in enumerate(sources, 1):
|
||||
title = source.get("title", "Untitled")
|
||||
url = source.get("url", "")
|
||||
date = source.get("date", "")
|
||||
snippet = source.get("snippet", "")
|
||||
|
||||
# Format source entry with available metadata
|
||||
date_str = f" ({date})" if date else ""
|
||||
output += f"{i}. **{title}**{date_str}\n"
|
||||
|
||||
# Add venue indicator if detectable from URL
|
||||
venue_indicator = _detect_venue_tier(url)
|
||||
if venue_indicator:
|
||||
output += f" 📊 Venue: {venue_indicator}\n"
|
||||
|
||||
if url:
|
||||
output += f" 🔗 {url}\n"
|
||||
if snippet:
|
||||
output += f" _{snippet[:150]}{'...' if len(snippet) > 150 else ''}_\n"
|
||||
output += "\n"
|
||||
|
||||
# Display extracted citations (DOIs, etc.)
|
||||
if citations:
|
||||
doi_citations = [c for c in citations if c.get("type") == "doi"]
|
||||
url_citations = [c for c in citations if c.get("type") == "url"]
|
||||
|
||||
if doi_citations:
|
||||
output += f"\n🔗 **DOI References ({len(doi_citations)}):**\n"
|
||||
for i, citation in enumerate(doi_citations, 1):
|
||||
output += f"{i}. DOI: {citation.get('doi', '')} → {citation.get('url', '')}\n"
|
||||
|
||||
if url_citations:
|
||||
output += f"\n🌐 **Additional URLs ({len(url_citations)}):**\n"
|
||||
for i, citation in enumerate(url_citations, 1):
|
||||
url = citation.get('url', '')
|
||||
venue = _detect_venue_tier(url)
|
||||
venue_str = f" [{venue}]" if venue else ""
|
||||
output += f"{i}. {url}{venue_str}\n"
|
||||
|
||||
if result.get("usage"):
|
||||
usage = result["usage"]
|
||||
output += f"\n**Usage:** {usage.get('total_tokens', 'N/A')} tokens"
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def _detect_venue_tier(url: str) -> Optional[str]:
|
||||
"""Detect venue tier from URL to indicate source quality."""
|
||||
if not url:
|
||||
return None
|
||||
|
||||
url_lower = url.lower()
|
||||
|
||||
# Tier 1 - Premier venues
|
||||
tier1_indicators = {
|
||||
"nature.com": "Nature (Tier 1)",
|
||||
"science.org": "Science (Tier 1)",
|
||||
"cell.com": "Cell Press (Tier 1)",
|
||||
"nejm.org": "NEJM (Tier 1)",
|
||||
"thelancet.com": "Lancet (Tier 1)",
|
||||
"jamanetwork.com": "JAMA (Tier 1)",
|
||||
"pnas.org": "PNAS (Tier 1)",
|
||||
}
|
||||
|
||||
# Tier 2 - High-impact specialized
|
||||
tier2_indicators = {
|
||||
"neurips.cc": "NeurIPS (Tier 2 - Top ML)",
|
||||
"icml.cc": "ICML (Tier 2 - Top ML)",
|
||||
"openreview.net": "Top ML Conference (Tier 2)",
|
||||
"aacrjournals.org": "AACR Journals (Tier 2)",
|
||||
"ahajournals.org": "AHA Journals (Tier 2)",
|
||||
"bloodjournal.org": "Blood (Tier 2)",
|
||||
"jci.org": "JCI (Tier 2)",
|
||||
}
|
||||
|
||||
# Tier 3 - Respected academic sources
|
||||
tier3_indicators = {
|
||||
"springer.com": "Springer",
|
||||
"wiley.com": "Wiley",
|
||||
"elsevier.com": "Elsevier",
|
||||
"oup.com": "Oxford University Press",
|
||||
"arxiv.org": "arXiv (Preprint)",
|
||||
"biorxiv.org": "bioRxiv (Preprint)",
|
||||
"medrxiv.org": "medRxiv (Preprint)",
|
||||
"pubmed": "PubMed",
|
||||
"ncbi.nlm.nih.gov": "NCBI/PubMed",
|
||||
"ieee.org": "IEEE",
|
||||
"acm.org": "ACM",
|
||||
}
|
||||
|
||||
for domain, label in tier1_indicators.items():
|
||||
if domain in url_lower:
|
||||
return label
|
||||
|
||||
for domain, label in tier2_indicators.items():
|
||||
if domain in url_lower:
|
||||
return label
|
||||
|
||||
for domain, label in tier3_indicators.items():
|
||||
if domain in url_lower:
|
||||
return label
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for Claude Code tool."""
|
||||
# Check for API key
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
print("❌ Error: OPENROUTER_API_KEY environment variable not set")
|
||||
print("Please set it in your .env file or export it:")
|
||||
print(" export OPENROUTER_API_KEY='your_openrouter_api_key'")
|
||||
return 1
|
||||
|
||||
# Get query from command line arguments
|
||||
if len(sys.argv) < 2:
|
||||
print("❌ Error: No query provided")
|
||||
print("Usage: python lookup.py 'your research query here'")
|
||||
return 1
|
||||
|
||||
query = " ".join(sys.argv[1:])
|
||||
|
||||
try:
|
||||
# Initialize research tool
|
||||
research = ResearchLookup()
|
||||
|
||||
# Perform lookup
|
||||
print(f"🔍 Researching: {query}")
|
||||
result = research.lookup(query)
|
||||
|
||||
# Format and output result
|
||||
formatted_output = format_response(result)
|
||||
print(formatted_output)
|
||||
|
||||
# Return success code
|
||||
return 0 if result["success"] else 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {str(e)}")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
@@ -1,208 +1,269 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Research Information Lookup Tool
|
||||
Uses Perplexity's Sonar Pro Search model through OpenRouter for academic research queries.
|
||||
|
||||
Routes research queries to the best backend:
|
||||
- Parallel Chat API (core model): Default for all general research queries
|
||||
- Perplexity sonar-pro-search (via OpenRouter): Academic-specific paper searches
|
||||
|
||||
Environment variables:
|
||||
PARALLEL_API_KEY - Required for Parallel Chat API (primary backend)
|
||||
OPENROUTER_API_KEY - Required for Perplexity academic searches (fallback)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import quote
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class ResearchLookup:
|
||||
"""Research information lookup using Perplexity Sonar models via OpenRouter."""
|
||||
"""Research information lookup with intelligent backend routing.
|
||||
|
||||
# Available models
|
||||
MODELS = {
|
||||
"pro": "perplexity/sonar-pro", # Fast lookup, cost-effective
|
||||
"reasoning": "perplexity/sonar-reasoning-pro", # Deep analysis with reasoning
|
||||
}
|
||||
Routes queries to the Parallel Chat API (default) or Perplexity
|
||||
sonar-pro-search (academic paper searches only).
|
||||
"""
|
||||
|
||||
# Keywords that indicate complex queries requiring reasoning model
|
||||
REASONING_KEYWORDS = [
|
||||
"compare", "contrast", "analyze", "analysis", "evaluate", "critique",
|
||||
"versus", "vs", "vs.", "compared to", "differences between", "similarities",
|
||||
"meta-analysis", "systematic review", "synthesis", "integrate",
|
||||
"mechanism", "why", "how does", "how do", "explain", "relationship",
|
||||
"theoretical framework", "implications", "interpret", "reasoning",
|
||||
"controversy", "conflicting", "paradox", "debate", "reconcile",
|
||||
"pros and cons", "advantages and disadvantages", "trade-off", "tradeoff",
|
||||
ACADEMIC_KEYWORDS = [
|
||||
"find papers", "find paper", "find articles", "find article",
|
||||
"cite ", "citation", "citations for",
|
||||
"doi ", "doi:", "pubmed", "pmid",
|
||||
"journal article", "peer-reviewed",
|
||||
"systematic review", "meta-analysis",
|
||||
"literature search", "literature on",
|
||||
"academic papers", "academic paper",
|
||||
"research papers on", "research paper on",
|
||||
"published studies", "published study",
|
||||
"scholarly", "scholar",
|
||||
"arxiv", "preprint",
|
||||
"foundational papers", "seminal papers", "landmark papers",
|
||||
"highly cited", "most cited",
|
||||
]
|
||||
|
||||
def __init__(self, force_model: Optional[str] = None):
|
||||
"""
|
||||
Initialize the research lookup tool.
|
||||
|
||||
Args:
|
||||
force_model: Optional model override ('pro' or 'reasoning').
|
||||
If None, model is auto-selected based on query complexity.
|
||||
"""
|
||||
self.api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
||||
PARALLEL_SYSTEM_PROMPT = (
|
||||
"You are a deep research analyst. Provide a comprehensive, well-cited "
|
||||
"research report on the user's topic. Include:\n"
|
||||
"- Key findings with specific data, statistics, and quantitative evidence\n"
|
||||
"- Detailed analysis organized by themes\n"
|
||||
"- Multiple authoritative sources cited inline\n"
|
||||
"- Methodologies and implications where relevant\n"
|
||||
"- Future outlook and research gaps\n"
|
||||
"Use markdown formatting with clear section headers. "
|
||||
"Prioritize authoritative and recent sources."
|
||||
)
|
||||
|
||||
self.base_url = "https://openrouter.ai/api/v1"
|
||||
self.force_model = force_model
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
CHAT_BASE_URL = "https://api.parallel.ai"
|
||||
|
||||
def __init__(self, force_backend: Optional[str] = None):
|
||||
"""Initialize the research lookup tool.
|
||||
|
||||
Args:
|
||||
force_backend: Force a specific backend ('parallel' or 'perplexity').
|
||||
If None, backend is auto-selected based on query content.
|
||||
"""
|
||||
self.force_backend = force_backend
|
||||
self.parallel_available = bool(os.getenv("PARALLEL_API_KEY"))
|
||||
self.perplexity_available = bool(os.getenv("OPENROUTER_API_KEY"))
|
||||
|
||||
if not self.parallel_available and not self.perplexity_available:
|
||||
raise ValueError(
|
||||
"No API keys found. Set at least one of:\n"
|
||||
" PARALLEL_API_KEY (for Parallel Chat API - primary)\n"
|
||||
" OPENROUTER_API_KEY (for Perplexity academic search - fallback)"
|
||||
)
|
||||
|
||||
def _select_backend(self, query: str) -> str:
|
||||
"""Select the best backend for a query."""
|
||||
if self.force_backend:
|
||||
if self.force_backend == "perplexity" and self.perplexity_available:
|
||||
return "perplexity"
|
||||
if self.force_backend == "parallel" and self.parallel_available:
|
||||
return "parallel"
|
||||
|
||||
query_lower = query.lower()
|
||||
is_academic = any(kw in query_lower for kw in self.ACADEMIC_KEYWORDS)
|
||||
|
||||
if is_academic and self.perplexity_available:
|
||||
return "perplexity"
|
||||
|
||||
if self.parallel_available:
|
||||
return "parallel"
|
||||
|
||||
if self.perplexity_available:
|
||||
return "perplexity"
|
||||
|
||||
raise ValueError("No backend available. Check API keys.")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parallel Chat API backend
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _get_chat_client(self):
|
||||
"""Lazy-load and cache the OpenAI client for Parallel Chat API."""
|
||||
if not hasattr(self, "_chat_client"):
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The 'openai' package is required for Parallel Chat API.\n"
|
||||
"Install it with: pip install openai"
|
||||
)
|
||||
self._chat_client = OpenAI(
|
||||
api_key=os.getenv("PARALLEL_API_KEY"),
|
||||
base_url=self.CHAT_BASE_URL,
|
||||
)
|
||||
return self._chat_client
|
||||
|
||||
def _parallel_lookup(self, query: str) -> Dict[str, Any]:
|
||||
"""Run research via the Parallel Chat API (core model)."""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
model = "core"
|
||||
|
||||
try:
|
||||
client = self._get_chat_client()
|
||||
|
||||
print(f"[Research] Parallel Chat API (model={model})...", file=sys.stderr)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": self.PARALLEL_SYSTEM_PROMPT},
|
||||
{"role": "user", "content": query},
|
||||
],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
content = ""
|
||||
if response.choices and len(response.choices) > 0:
|
||||
content = response.choices[0].message.content or ""
|
||||
|
||||
api_citations = self._extract_basis_citations(response)
|
||||
text_citations = self._extract_citations_from_text(content)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"query": query,
|
||||
"response": content,
|
||||
"citations": api_citations + text_citations,
|
||||
"sources": api_citations,
|
||||
"timestamp": timestamp,
|
||||
"backend": "parallel",
|
||||
"model": f"parallel-chat/{model}",
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"query": query,
|
||||
"error": str(e),
|
||||
"timestamp": timestamp,
|
||||
"backend": "parallel",
|
||||
"model": f"parallel-chat/{model}",
|
||||
}
|
||||
|
||||
def _extract_basis_citations(self, response) -> List[Dict[str, str]]:
|
||||
"""Extract citation sources from the Chat API research basis."""
|
||||
citations = []
|
||||
basis = getattr(response, "basis", None)
|
||||
if not basis:
|
||||
return citations
|
||||
|
||||
seen_urls = set()
|
||||
if isinstance(basis, list):
|
||||
for item in basis:
|
||||
cits = (
|
||||
item.get("citations", []) if isinstance(item, dict)
|
||||
else getattr(item, "citations", None) or []
|
||||
)
|
||||
for cit in cits:
|
||||
url = cit.get("url", "") if isinstance(cit, dict) else getattr(cit, "url", "")
|
||||
if url and url not in seen_urls:
|
||||
seen_urls.add(url)
|
||||
title = cit.get("title", "") if isinstance(cit, dict) else getattr(cit, "title", "")
|
||||
excerpts = cit.get("excerpts", []) if isinstance(cit, dict) else getattr(cit, "excerpts", [])
|
||||
citations.append({
|
||||
"type": "source",
|
||||
"url": url,
|
||||
"title": title,
|
||||
"excerpts": excerpts,
|
||||
})
|
||||
|
||||
return citations
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Perplexity academic search backend
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _perplexity_lookup(self, query: str) -> Dict[str, Any]:
|
||||
"""Run academic search via Perplexity sonar-pro-search through OpenRouter."""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
model = "perplexity/sonar-pro-search"
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://scientific-writer.local",
|
||||
"X-Title": "Scientific Writer Research Tool"
|
||||
"X-Title": "Scientific Writer Research Tool",
|
||||
}
|
||||
|
||||
def _select_model(self, query: str) -> str:
|
||||
"""
|
||||
Select the appropriate model based on query complexity.
|
||||
|
||||
Args:
|
||||
query: The research query
|
||||
|
||||
Returns:
|
||||
Model identifier string
|
||||
"""
|
||||
if self.force_model:
|
||||
return self.MODELS.get(self.force_model, self.MODELS["reasoning"])
|
||||
|
||||
# Check for reasoning keywords (case-insensitive)
|
||||
query_lower = query.lower()
|
||||
for keyword in self.REASONING_KEYWORDS:
|
||||
if keyword in query_lower:
|
||||
return self.MODELS["reasoning"]
|
||||
|
||||
# Check for multiple questions or complex structure
|
||||
question_count = query.count("?")
|
||||
if question_count >= 2:
|
||||
return self.MODELS["reasoning"]
|
||||
|
||||
# Check for very long queries (likely complex)
|
||||
if len(query) > 200:
|
||||
return self.MODELS["reasoning"]
|
||||
|
||||
# Default to pro for simple lookups
|
||||
return self.MODELS["pro"]
|
||||
research_prompt = self._format_academic_prompt(query)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are an academic research assistant specializing in finding "
|
||||
"HIGH-IMPACT, INFLUENTIAL research.\n\n"
|
||||
"QUALITY PRIORITIZATION (CRITICAL):\n"
|
||||
"- ALWAYS prefer highly-cited papers over obscure publications\n"
|
||||
"- ALWAYS prioritize Tier-1 venues: Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS\n"
|
||||
"- ALWAYS prefer papers from established researchers\n"
|
||||
"- Include citation counts when known (e.g., 'cited 500+ times')\n"
|
||||
"- Quality matters more than quantity\n\n"
|
||||
"VENUE HIERARCHY:\n"
|
||||
"1. Nature/Science/Cell family, NEJM, Lancet, JAMA (highest)\n"
|
||||
"2. High-impact specialized journals (IF>10), top conferences (NeurIPS, ICML, ICLR)\n"
|
||||
"3. Respected field-specific journals (IF 5-10)\n"
|
||||
"4. Other peer-reviewed sources (only if no better option)\n\n"
|
||||
"Focus exclusively on scholarly sources. Prioritize recent literature (2020-2026) "
|
||||
"and provide complete citations with DOIs."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": research_prompt},
|
||||
]
|
||||
|
||||
def _make_request(self, messages: List[Dict[str, str]], model: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Make a request to the OpenRouter API with academic search mode."""
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": 8000,
|
||||
"temperature": 0.1, # Low temperature for factual research
|
||||
# Perplexity-specific parameters for academic search
|
||||
"search_mode": "academic", # Prioritize scholarly sources (peer-reviewed papers, journals)
|
||||
"search_context_size": "high", # Always use high context for deeper research
|
||||
**kwargs
|
||||
"temperature": 0.1,
|
||||
"search_mode": "academic",
|
||||
"search_context_size": "high",
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=self.headers,
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers=headers,
|
||||
json=data,
|
||||
timeout=90 # Increased timeout for academic search
|
||||
timeout=90,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"API request failed: {str(e)}")
|
||||
resp_json = response.json()
|
||||
|
||||
def _format_research_prompt(self, query: str) -> str:
|
||||
"""Format the query for optimal research results."""
|
||||
return f"""You are an expert research assistant. Please provide comprehensive, accurate research information for the following query: "{query}"
|
||||
|
||||
IMPORTANT INSTRUCTIONS:
|
||||
1. Focus on ACADEMIC and SCIENTIFIC sources (peer-reviewed papers, reputable journals, institutional research)
|
||||
2. Include RECENT information (prioritize 2020-2026 publications)
|
||||
3. Provide COMPLETE citations with authors, title, journal/conference, year, and DOI when available
|
||||
4. Structure your response with clear sections and proper attribution
|
||||
5. Be comprehensive but concise - aim for 800-1200 words
|
||||
6. Include key findings, methodologies, and implications when relevant
|
||||
7. Note any controversies, limitations, or conflicting evidence
|
||||
|
||||
PAPER QUALITY AND POPULARITY PRIORITIZATION (CRITICAL):
|
||||
8. ALWAYS prioritize HIGHLY-CITED papers over obscure publications:
|
||||
- Recent papers (0-3 years): prefer 20+ citations, highlight 100+ as highly influential
|
||||
- Mid-age papers (3-7 years): prefer 100+ citations, highlight 500+ as landmark
|
||||
- Older papers (7+ years): prefer 500+ citations, highlight 1000+ as foundational
|
||||
9. ALWAYS prioritize papers from TOP-TIER VENUES:
|
||||
- Tier 1 (highest priority): Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS, Nature Medicine, Nature Biotechnology
|
||||
- Tier 2 (high priority): High-impact specialized journals (IF>10), top conferences (NeurIPS, ICML, ICLR for AI/ML)
|
||||
- Tier 3: Respected specialized journals (IF 5-10)
|
||||
- Only cite lower-tier venues if directly relevant AND no better source exists
|
||||
10. PREFER papers from ESTABLISHED, REPUTABLE AUTHORS:
|
||||
- Senior researchers with high h-index and multiple high-impact publications
|
||||
- Leading research groups at recognized institutions
|
||||
- Authors with recognized expertise (awards, editorial positions)
|
||||
11. For EACH citation, include when available:
|
||||
- Approximate citation count (e.g., "cited 500+ times")
|
||||
- Journal/venue tier indicator
|
||||
- Notable author credentials if relevant
|
||||
12. PRIORITIZE papers that DIRECTLY address the research question over tangentially related work
|
||||
|
||||
RESPONSE FORMAT:
|
||||
- Start with a brief summary (2-3 sentences)
|
||||
- Present key findings and studies in organized sections
|
||||
- Rank papers by impact: most influential/cited first
|
||||
- End with future directions or research gaps if applicable
|
||||
- Include 5-8 high-quality citations, emphasizing Tier-1 venues and highly-cited papers
|
||||
|
||||
Remember: Quality over quantity. Prioritize influential, highly-cited papers from prestigious venues and established researchers."""
|
||||
|
||||
def lookup(self, query: str) -> Dict[str, Any]:
|
||||
"""Perform a research lookup for the given query."""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Select model based on query complexity
|
||||
model = self._select_model(query)
|
||||
|
||||
# Format the research prompt
|
||||
research_prompt = self._format_research_prompt(query)
|
||||
|
||||
# Prepare messages for the API with system message for academic mode
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": """You are an academic research assistant specializing in finding HIGH-IMPACT, INFLUENTIAL research.
|
||||
|
||||
QUALITY PRIORITIZATION (CRITICAL):
|
||||
- ALWAYS prefer highly-cited papers over obscure publications
|
||||
- ALWAYS prioritize Tier-1 venues: Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS, and their family journals
|
||||
- ALWAYS prefer papers from established researchers with strong publication records
|
||||
- Include citation counts when known (e.g., "cited 500+ times")
|
||||
- Quality matters more than quantity - 5 excellent papers beats 10 mediocre ones
|
||||
|
||||
VENUE HIERARCHY:
|
||||
1. Nature/Science/Cell family, NEJM, Lancet, JAMA (highest priority)
|
||||
2. High-impact specialized journals (IF>10), top ML conferences (NeurIPS, ICML, ICLR)
|
||||
3. Respected field-specific journals (IF 5-10)
|
||||
4. Other peer-reviewed sources (only if no better option exists)
|
||||
|
||||
Focus exclusively on scholarly sources: peer-reviewed journals, academic papers, research institutions. Prioritize recent academic literature (2020-2026) and provide complete citations with DOIs. Always indicate paper impact through citation counts and venue prestige."""
|
||||
},
|
||||
{"role": "user", "content": research_prompt}
|
||||
]
|
||||
|
||||
try:
|
||||
# Make the API request
|
||||
response = self._make_request(messages, model)
|
||||
|
||||
# Extract the response content
|
||||
if "choices" in response and len(response["choices"]) > 0:
|
||||
choice = response["choices"][0]
|
||||
if "choices" in resp_json and len(resp_json["choices"]) > 0:
|
||||
choice = resp_json["choices"][0]
|
||||
if "message" in choice and "content" in choice["message"]:
|
||||
content = choice["message"]["content"]
|
||||
|
||||
# Extract citations from API response (Perplexity provides these)
|
||||
api_citations = self._extract_api_citations(response, choice)
|
||||
|
||||
# Also extract citations from text as fallback
|
||||
api_citations = self._extract_api_citations(resp_json, choice)
|
||||
text_citations = self._extract_citations_from_text(content)
|
||||
|
||||
# Combine: prioritize API citations, add text citations if no duplicates
|
||||
citations = api_citations + text_citations
|
||||
|
||||
return {
|
||||
@@ -210,10 +271,11 @@ Focus exclusively on scholarly sources: peer-reviewed journals, academic papers,
|
||||
"query": query,
|
||||
"response": content,
|
||||
"citations": citations,
|
||||
"sources": api_citations, # Separate field for API-provided sources
|
||||
"sources": api_citations,
|
||||
"timestamp": timestamp,
|
||||
"backend": "perplexity",
|
||||
"model": model,
|
||||
"usage": response.get("usage", {})
|
||||
"usage": resp_json.get("usage", {}),
|
||||
}
|
||||
else:
|
||||
raise Exception("Invalid response format from API")
|
||||
@@ -226,22 +288,54 @@ Focus exclusively on scholarly sources: peer-reviewed journals, academic papers,
|
||||
"query": query,
|
||||
"error": str(e),
|
||||
"timestamp": timestamp,
|
||||
"model": model
|
||||
"backend": "perplexity",
|
||||
"model": model,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Shared utilities
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _format_academic_prompt(self, query: str) -> str:
|
||||
"""Format a query for academic research results via Perplexity."""
|
||||
return f"""You are an expert research assistant. Please provide comprehensive, accurate research information for the following query: "{query}"
|
||||
|
||||
IMPORTANT INSTRUCTIONS:
|
||||
1. Focus on ACADEMIC and SCIENTIFIC sources (peer-reviewed papers, reputable journals, institutional research)
|
||||
2. Include RECENT information (prioritize 2020-2026 publications)
|
||||
3. Provide COMPLETE citations with authors, title, journal/conference, year, and DOI when available
|
||||
4. Structure your response with clear sections and proper attribution
|
||||
5. Be comprehensive but concise - aim for 800-1200 words
|
||||
6. Include key findings, methodologies, and implications when relevant
|
||||
7. Note any controversies, limitations, or conflicting evidence
|
||||
|
||||
PAPER QUALITY PRIORITIZATION (CRITICAL):
|
||||
8. ALWAYS prioritize HIGHLY-CITED papers over obscure publications
|
||||
9. ALWAYS prioritize papers from TOP-TIER VENUES (Nature, Science, Cell, NEJM, Lancet, JAMA, PNAS)
|
||||
10. PREFER papers from ESTABLISHED, REPUTABLE AUTHORS
|
||||
11. For EACH citation include when available: citation count, venue tier, author credentials
|
||||
12. PRIORITIZE papers that DIRECTLY address the research question
|
||||
|
||||
RESPONSE FORMAT:
|
||||
- Start with a brief summary (2-3 sentences)
|
||||
- Present key findings and studies in organized sections
|
||||
- Rank papers by impact: most influential/cited first
|
||||
- End with future directions or research gaps if applicable
|
||||
- Include 5-8 high-quality citations
|
||||
|
||||
Remember: Quality over quantity. Prioritize influential, highly-cited papers from prestigious venues."""
|
||||
|
||||
def _extract_api_citations(self, response: Dict[str, Any], choice: Dict[str, Any]) -> List[Dict[str, str]]:
|
||||
"""Extract citations from Perplexity API response fields."""
|
||||
citations = []
|
||||
|
||||
# Perplexity returns citations in search_results field (new format)
|
||||
# Check multiple possible locations where OpenRouter might place them
|
||||
|
||||
search_results = (
|
||||
response.get("search_results") or
|
||||
choice.get("search_results") or
|
||||
choice.get("message", {}).get("search_results") or
|
||||
[]
|
||||
response.get("search_results")
|
||||
or choice.get("search_results")
|
||||
or choice.get("message", {}).get("search_results")
|
||||
or []
|
||||
)
|
||||
|
||||
|
||||
for result in search_results:
|
||||
citation = {
|
||||
"type": "source",
|
||||
@@ -249,162 +343,164 @@ Focus exclusively on scholarly sources: peer-reviewed journals, academic papers,
|
||||
"url": result.get("url", ""),
|
||||
"date": result.get("date", ""),
|
||||
}
|
||||
# Add snippet if available (newer API feature)
|
||||
if result.get("snippet"):
|
||||
citation["snippet"] = result.get("snippet")
|
||||
citation["snippet"] = result["snippet"]
|
||||
citations.append(citation)
|
||||
|
||||
# Also check for legacy citations field (backward compatibility)
|
||||
|
||||
legacy_citations = (
|
||||
response.get("citations") or
|
||||
choice.get("citations") or
|
||||
choice.get("message", {}).get("citations") or
|
||||
[]
|
||||
response.get("citations")
|
||||
or choice.get("citations")
|
||||
or choice.get("message", {}).get("citations")
|
||||
or []
|
||||
)
|
||||
|
||||
|
||||
for url in legacy_citations:
|
||||
if isinstance(url, str):
|
||||
# Legacy format was just URLs
|
||||
citations.append({
|
||||
"type": "source",
|
||||
"url": url,
|
||||
"title": "",
|
||||
"date": ""
|
||||
})
|
||||
citations.append({"type": "source", "url": url, "title": "", "date": ""})
|
||||
elif isinstance(url, dict):
|
||||
citations.append({
|
||||
"type": "source",
|
||||
"url": url.get("url", ""),
|
||||
"title": url.get("title", ""),
|
||||
"date": url.get("date", "")
|
||||
"date": url.get("date", ""),
|
||||
})
|
||||
|
||||
|
||||
return citations
|
||||
|
||||
def _extract_citations_from_text(self, text: str) -> List[Dict[str, str]]:
|
||||
"""Extract potential citations from the response text as fallback."""
|
||||
import re
|
||||
"""Extract DOIs and academic URLs from response text as fallback."""
|
||||
citations = []
|
||||
|
||||
# Look for DOI patterns first (most reliable)
|
||||
# Matches: doi:10.xxx, DOI: 10.xxx, https://doi.org/10.xxx
|
||||
doi_pattern = r'(?:doi[:\s]*|https?://(?:dx\.)?doi\.org/)(10\.[0-9]{4,}/[^\s\)\]\,\[\<\>]+)'
|
||||
doi_matches = re.findall(doi_pattern, text, re.IGNORECASE)
|
||||
seen_dois = set()
|
||||
|
||||
for doi in doi_matches:
|
||||
# Clean up DOI - remove trailing punctuation and brackets
|
||||
doi_clean = doi.strip().rstrip('.,;:)]')
|
||||
doi_clean = doi.strip().rstrip(".,;:)]")
|
||||
if doi_clean and doi_clean not in seen_dois:
|
||||
seen_dois.add(doi_clean)
|
||||
citations.append({
|
||||
"type": "doi",
|
||||
"doi": doi_clean,
|
||||
"url": f"https://doi.org/{doi_clean}"
|
||||
"url": f"https://doi.org/{doi_clean}",
|
||||
})
|
||||
|
||||
# Look for URLs that might be sources
|
||||
url_pattern = r'https?://[^\s\)\]\,\<\>\"\']+(?:arxiv\.org|pubmed|ncbi\.nlm\.nih\.gov|nature\.com|science\.org|wiley\.com|springer\.com|ieee\.org|acm\.org)[^\s\)\]\,\<\>\"\']*'
|
||||
url_pattern = (
|
||||
r'https?://[^\s\)\]\,\<\>\"\']+(?:arxiv\.org|pubmed|ncbi\.nlm\.nih\.gov|'
|
||||
r'nature\.com|science\.org|wiley\.com|springer\.com|ieee\.org|acm\.org)'
|
||||
r'[^\s\)\]\,\<\>\"\']*'
|
||||
)
|
||||
url_matches = re.findall(url_pattern, text, re.IGNORECASE)
|
||||
seen_urls = set()
|
||||
|
||||
|
||||
for url in url_matches:
|
||||
url_clean = url.rstrip('.')
|
||||
url_clean = url.rstrip(".")
|
||||
if url_clean not in seen_urls:
|
||||
seen_urls.add(url_clean)
|
||||
citations.append({
|
||||
"type": "url",
|
||||
"url": url_clean
|
||||
})
|
||||
citations.append({"type": "url", "url": url_clean})
|
||||
|
||||
return citations
|
||||
|
||||
def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List[Dict[str, Any]]:
|
||||
"""Perform multiple research lookups with optional delay between requests."""
|
||||
results = []
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def lookup(self, query: str) -> Dict[str, Any]:
|
||||
"""Perform a research lookup, routing to the best backend.
|
||||
|
||||
Parallel Chat API is used by default. Perplexity sonar-pro-search
|
||||
is used only for academic-specific queries (paper searches, DOI lookups).
|
||||
"""
|
||||
backend = self._select_backend(query)
|
||||
print(f"[Research] Backend: {backend} | Query: {query[:80]}...", file=sys.stderr)
|
||||
|
||||
if backend == "parallel":
|
||||
return self._parallel_lookup(query)
|
||||
else:
|
||||
return self._perplexity_lookup(query)
|
||||
|
||||
def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List[Dict[str, Any]]:
|
||||
"""Perform multiple research lookups with delay between requests."""
|
||||
results = []
|
||||
for i, query in enumerate(queries):
|
||||
if i > 0 and delay > 0:
|
||||
time.sleep(delay) # Rate limiting
|
||||
|
||||
time.sleep(delay)
|
||||
result = self.lookup(query)
|
||||
results.append(result)
|
||||
|
||||
# Print progress
|
||||
print(f"[Research] Completed query {i+1}/{len(queries)}: {query[:50]}...")
|
||||
|
||||
print(f"[Research] Completed query {i+1}/{len(queries)}: {query[:50]}...", file=sys.stderr)
|
||||
return results
|
||||
|
||||
def get_model_info(self) -> Dict[str, Any]:
|
||||
"""Get information about available models from OpenRouter."""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.base_url}/models",
|
||||
headers=self.headers,
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
"""Command-line interface for testing the research lookup tool."""
|
||||
"""Command-line interface for the research lookup tool."""
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
parser = argparse.ArgumentParser(description="Research Information Lookup Tool")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Research Information Lookup Tool (Parallel Chat API + Perplexity)",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# General research (uses Parallel Chat API, core model)
|
||||
python research_lookup.py "latest advances in quantum computing 2025"
|
||||
|
||||
# Academic paper search (auto-routes to Perplexity)
|
||||
python research_lookup.py "find papers on CRISPR gene editing clinical trials"
|
||||
|
||||
# Force a specific backend
|
||||
python research_lookup.py "topic" --force-backend parallel
|
||||
python research_lookup.py "topic" --force-backend perplexity
|
||||
|
||||
# Save output to file
|
||||
python research_lookup.py "topic" -o results.txt
|
||||
|
||||
# JSON output
|
||||
python research_lookup.py "topic" --json -o results.json
|
||||
""",
|
||||
)
|
||||
parser.add_argument("query", nargs="?", help="Research query to look up")
|
||||
parser.add_argument("--model-info", action="store_true", help="Show available models")
|
||||
parser.add_argument("--batch", nargs="+", help="Run multiple queries")
|
||||
parser.add_argument("--force-model", choices=["pro", "reasoning"],
|
||||
help="Force specific model: 'pro' for fast lookup, 'reasoning' for deep analysis")
|
||||
parser.add_argument("-o", "--output", help="Write output to file instead of stdout")
|
||||
parser.add_argument("--json", action="store_true", help="Output results as JSON")
|
||||
parser.add_argument(
|
||||
"--force-backend",
|
||||
choices=["parallel", "perplexity"],
|
||||
help="Force a specific backend (default: auto-select)",
|
||||
)
|
||||
parser.add_argument("-o", "--output", help="Write output to file")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set up output destination
|
||||
|
||||
output_file = None
|
||||
if args.output:
|
||||
output_file = open(args.output, 'w', encoding='utf-8')
|
||||
|
||||
output_file = open(args.output, "w", encoding="utf-8")
|
||||
|
||||
def write_output(text):
|
||||
"""Write to file or stdout."""
|
||||
if output_file:
|
||||
output_file.write(text + '\n')
|
||||
output_file.write(text + "\n")
|
||||
else:
|
||||
print(text)
|
||||
|
||||
# Check for API key
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
print("Error: OPENROUTER_API_KEY environment variable not set", file=sys.stderr)
|
||||
print("Please set it in your .env file or export it:", file=sys.stderr)
|
||||
print(" export OPENROUTER_API_KEY='your_openrouter_api_key'", file=sys.stderr)
|
||||
has_parallel = bool(os.getenv("PARALLEL_API_KEY"))
|
||||
has_perplexity = bool(os.getenv("OPENROUTER_API_KEY"))
|
||||
if not has_parallel and not has_perplexity:
|
||||
print("Error: No API keys found. Set at least one:", file=sys.stderr)
|
||||
print(" export PARALLEL_API_KEY='...' (primary - Parallel Chat API)", file=sys.stderr)
|
||||
print(" export OPENROUTER_API_KEY='...' (fallback - Perplexity academic)", file=sys.stderr)
|
||||
if output_file:
|
||||
output_file.close()
|
||||
return 1
|
||||
|
||||
if not args.query and not args.batch:
|
||||
parser.print_help()
|
||||
if output_file:
|
||||
output_file.close()
|
||||
return 1
|
||||
|
||||
try:
|
||||
research = ResearchLookup(force_model=args.force_model)
|
||||
|
||||
if args.model_info:
|
||||
write_output("Available models from OpenRouter:")
|
||||
models = research.get_model_info()
|
||||
if "data" in models:
|
||||
for model in models["data"]:
|
||||
if "perplexity" in model["id"].lower():
|
||||
write_output(f" - {model['id']}: {model.get('name', 'N/A')}")
|
||||
if output_file:
|
||||
output_file.close()
|
||||
return 0
|
||||
|
||||
if not args.query and not args.batch:
|
||||
print("Error: No query provided. Use --model-info to see available models.", file=sys.stderr)
|
||||
if output_file:
|
||||
output_file.close()
|
||||
return 1
|
||||
research = ResearchLookup(force_backend=args.force_backend)
|
||||
|
||||
if args.batch:
|
||||
print(f"Running batch research for {len(args.batch)} queries...", file=sys.stderr)
|
||||
@@ -413,27 +509,24 @@ def main():
|
||||
print(f"Researching: {args.query}", file=sys.stderr)
|
||||
results = [research.lookup(args.query)]
|
||||
|
||||
# Output as JSON if requested
|
||||
if args.json:
|
||||
write_output(json.dumps(results, indent=2, ensure_ascii=False))
|
||||
write_output(json.dumps(results, indent=2, ensure_ascii=False, default=str))
|
||||
if output_file:
|
||||
output_file.close()
|
||||
return 0
|
||||
|
||||
# Display results in human-readable format
|
||||
for i, result in enumerate(results):
|
||||
if result["success"]:
|
||||
write_output(f"\n{'='*80}")
|
||||
write_output(f"Query {i+1}: {result['query']}")
|
||||
write_output(f"Timestamp: {result['timestamp']}")
|
||||
write_output(f"Model: {result['model']}")
|
||||
write_output(f"Backend: {result.get('backend', 'unknown')} | Model: {result.get('model', 'unknown')}")
|
||||
write_output(f"{'='*80}")
|
||||
write_output(result["response"])
|
||||
|
||||
# Display API-provided sources first (most reliable)
|
||||
sources = result.get("sources", [])
|
||||
if sources:
|
||||
write_output(f"\n📚 Sources ({len(sources)}):")
|
||||
write_output(f"\nSources ({len(sources)}):")
|
||||
for j, source in enumerate(sources):
|
||||
title = source.get("title", "Untitled")
|
||||
url = source.get("url", "")
|
||||
@@ -443,11 +536,10 @@ def main():
|
||||
if url:
|
||||
write_output(f" {url}")
|
||||
|
||||
# Display additional text-extracted citations
|
||||
citations = result.get("citations", [])
|
||||
text_citations = [c for c in citations if c.get("type") in ("doi", "url")]
|
||||
if text_citations:
|
||||
write_output(f"\n🔗 Additional References ({len(text_citations)}):")
|
||||
write_output(f"\nAdditional References ({len(text_citations)}):")
|
||||
for j, citation in enumerate(text_citations):
|
||||
if citation.get("type") == "doi":
|
||||
write_output(f" [{j+1}] DOI: {citation.get('doi', '')} - {citation.get('url', '')}")
|
||||
@@ -464,11 +556,11 @@ def main():
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}", file=sys.stderr)
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
if output_file:
|
||||
output_file.close()
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user