mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Refactor research lookup skill to enhance backend routing and update documentation. The skill now intelligently selects between the Parallel Chat API and Perplexity sonar-pro-search based on query type. Added compatibility notes, license information, and improved descriptions for clarity. Removed outdated example scripts to streamline the codebase.
569 lines
19 KiB
Python
569 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Parallel Web Systems API Client
|
|
|
|
Provides web search, URL content extraction, and deep research capabilities
|
|
using the Parallel Web Systems APIs (https://docs.parallel.ai).
|
|
|
|
Primary interface: Parallel Chat API (OpenAI-compatible) for search and research.
|
|
Secondary interface: Extract API for URL verification and special cases.
|
|
|
|
Main classes:
|
|
- ParallelChat: Core Chat API client (base/core models)
|
|
- ParallelSearch: Web search via Chat API (base model)
|
|
- ParallelDeepResearch: Deep research via Chat API (core model)
|
|
- ParallelExtract: URL content extraction (Extract API, verification only)
|
|
|
|
Environment variable required:
|
|
PARALLEL_API_KEY - Your Parallel API key from https://platform.parallel.ai
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
def _get_api_key():
|
|
"""Validate and return the Parallel API key."""
|
|
api_key = os.getenv("PARALLEL_API_KEY")
|
|
if not api_key:
|
|
raise ValueError(
|
|
"PARALLEL_API_KEY environment variable not set.\n"
|
|
"Get your key at https://platform.parallel.ai and set it:\n"
|
|
" export PARALLEL_API_KEY='your_key_here'"
|
|
)
|
|
return api_key
|
|
|
|
|
|
def _get_extract_client():
|
|
"""Create and return a Parallel SDK client for the Extract API."""
|
|
try:
|
|
from parallel import Parallel
|
|
except ImportError:
|
|
raise ImportError(
|
|
"The 'parallel-web' package is required for extract. Install it with:\n"
|
|
" pip install parallel-web"
|
|
)
|
|
return Parallel(api_key=_get_api_key())
|
|
|
|
|
|
class ParallelChat:
|
|
"""Core client for the Parallel Chat API.
|
|
|
|
OpenAI-compatible chat completions endpoint that performs web research
|
|
and returns synthesized responses with citations.
|
|
|
|
Models:
|
|
- base : Standard research, factual queries (15-100s latency)
|
|
- core : Complex research, multi-source synthesis (60s-5min latency)
|
|
"""
|
|
|
|
CHAT_BASE_URL = "https://api.parallel.ai"
|
|
|
|
def __init__(self):
|
|
try:
|
|
from openai import OpenAI
|
|
except ImportError:
|
|
raise ImportError(
|
|
"The 'openai' package is required. Install it with:\n"
|
|
" pip install openai"
|
|
)
|
|
|
|
self.client = OpenAI(
|
|
api_key=_get_api_key(),
|
|
base_url=self.CHAT_BASE_URL,
|
|
)
|
|
|
|
def query(
|
|
self,
|
|
user_message: str,
|
|
system_message: Optional[str] = None,
|
|
model: str = "base",
|
|
) -> Dict[str, Any]:
|
|
"""Send a query to the Parallel Chat API.
|
|
|
|
Args:
|
|
user_message: The research query or question.
|
|
system_message: Optional system prompt to guide response style.
|
|
model: Chat model to use ('base' or 'core').
|
|
|
|
Returns:
|
|
Dict with 'content' (response text), 'sources' (citations), and metadata.
|
|
"""
|
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
messages = []
|
|
if system_message:
|
|
messages.append({"role": "system", "content": system_message})
|
|
messages.append({"role": "user", "content": user_message})
|
|
|
|
try:
|
|
print(f"[Parallel Chat] Querying model={model}...", file=sys.stderr)
|
|
|
|
response = self.client.chat.completions.create(
|
|
model=model,
|
|
messages=messages,
|
|
stream=False,
|
|
)
|
|
|
|
content = ""
|
|
if response.choices and len(response.choices) > 0:
|
|
content = response.choices[0].message.content or ""
|
|
|
|
sources = self._extract_basis(response)
|
|
|
|
return {
|
|
"success": True,
|
|
"content": content,
|
|
"sources": sources,
|
|
"citation_count": len(sources),
|
|
"model": model,
|
|
"timestamp": timestamp,
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"model": model,
|
|
"timestamp": timestamp,
|
|
}
|
|
|
|
def _extract_basis(self, response) -> List[Dict[str, str]]:
|
|
"""Extract citation sources from the Chat API research basis."""
|
|
sources = []
|
|
basis = getattr(response, "basis", None)
|
|
if not basis:
|
|
return sources
|
|
|
|
seen_urls = set()
|
|
if isinstance(basis, list):
|
|
for item in basis:
|
|
citations = (
|
|
item.get("citations", []) if isinstance(item, dict)
|
|
else getattr(item, "citations", None) or []
|
|
)
|
|
for cit in citations:
|
|
url = cit.get("url", "") if isinstance(cit, dict) else getattr(cit, "url", "")
|
|
if url and url not in seen_urls:
|
|
seen_urls.add(url)
|
|
title = cit.get("title", "") if isinstance(cit, dict) else getattr(cit, "title", "")
|
|
excerpts = cit.get("excerpts", []) if isinstance(cit, dict) else getattr(cit, "excerpts", [])
|
|
sources.append({
|
|
"type": "source",
|
|
"url": url,
|
|
"title": title,
|
|
"excerpts": excerpts,
|
|
})
|
|
|
|
return sources
|
|
|
|
|
|
class ParallelSearch:
|
|
"""Web search using the Parallel Chat API (base model).
|
|
|
|
Sends a search query to the Chat API which performs web research and
|
|
returns a synthesized summary with cited sources.
|
|
"""
|
|
|
|
SYSTEM_PROMPT = (
|
|
"You are a web research assistant. Search the web and synthesize information "
|
|
"about the user's query. Provide a clear, well-organized summary with:\n"
|
|
"- Key facts, data points, and statistics\n"
|
|
"- Specific names, dates, and numbers when available\n"
|
|
"- Multiple perspectives if the topic is debated\n"
|
|
"Cite your sources inline. Be comprehensive but concise."
|
|
)
|
|
|
|
def __init__(self):
|
|
self.chat = ParallelChat()
|
|
|
|
def search(
|
|
self,
|
|
objective: str,
|
|
model: str = "base",
|
|
) -> Dict[str, Any]:
|
|
"""Execute a web search via the Chat API.
|
|
|
|
Args:
|
|
objective: Natural language description of the search goal.
|
|
model: Chat model to use ('base' or 'core', default 'base').
|
|
|
|
Returns:
|
|
Dict with 'response' (synthesized text), 'sources', and metadata.
|
|
"""
|
|
result = self.chat.query(
|
|
user_message=objective,
|
|
system_message=self.SYSTEM_PROMPT,
|
|
model=model,
|
|
)
|
|
|
|
if not result["success"]:
|
|
return {
|
|
"success": False,
|
|
"objective": objective,
|
|
"error": result.get("error", "Unknown error"),
|
|
"timestamp": result["timestamp"],
|
|
}
|
|
|
|
return {
|
|
"success": True,
|
|
"objective": objective,
|
|
"response": result["content"],
|
|
"sources": result["sources"],
|
|
"citation_count": result["citation_count"],
|
|
"model": result["model"],
|
|
"backend": "parallel-chat",
|
|
"timestamp": result["timestamp"],
|
|
}
|
|
|
|
|
|
class ParallelExtract:
|
|
"""Extract clean content from URLs using Parallel's Extract API.
|
|
|
|
Converts any public URL into clean, LLM-optimized markdown.
|
|
Use for citation verification and special cases only.
|
|
For general research, use ParallelSearch or ParallelDeepResearch instead.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.client = _get_extract_client()
|
|
|
|
def extract(
|
|
self,
|
|
urls: List[str],
|
|
objective: Optional[str] = None,
|
|
excerpts: bool = True,
|
|
full_content: bool = False,
|
|
) -> Dict[str, Any]:
|
|
"""Extract content from one or more URLs.
|
|
|
|
Args:
|
|
urls: List of URLs to extract content from.
|
|
objective: Optional objective to focus extraction.
|
|
excerpts: Whether to return focused excerpts (default True).
|
|
full_content: Whether to return full page content (default False).
|
|
|
|
Returns:
|
|
Dict with 'results' list containing url, title, excerpts/content.
|
|
"""
|
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
kwargs = {
|
|
"urls": urls,
|
|
"excerpts": excerpts,
|
|
"full_content": full_content,
|
|
}
|
|
if objective:
|
|
kwargs["objective"] = objective
|
|
|
|
try:
|
|
response = self.client.beta.extract(**kwargs)
|
|
|
|
results = []
|
|
if hasattr(response, "results") and response.results:
|
|
for r in response.results:
|
|
result = {
|
|
"url": getattr(r, "url", ""),
|
|
"title": getattr(r, "title", ""),
|
|
"publish_date": getattr(r, "publish_date", None),
|
|
"excerpts": getattr(r, "excerpts", []),
|
|
"full_content": getattr(r, "full_content", None),
|
|
}
|
|
results.append(result)
|
|
|
|
errors = []
|
|
if hasattr(response, "errors") and response.errors:
|
|
errors = [str(e) for e in response.errors]
|
|
|
|
return {
|
|
"success": True,
|
|
"urls": urls,
|
|
"results": results,
|
|
"errors": errors,
|
|
"timestamp": timestamp,
|
|
"extract_id": getattr(response, "extract_id", None),
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
"success": False,
|
|
"urls": urls,
|
|
"error": str(e),
|
|
"timestamp": timestamp,
|
|
}
|
|
|
|
|
|
class ParallelDeepResearch:
|
|
"""Deep research using the Parallel Chat API (core model).
|
|
|
|
Sends complex research queries to the Chat API which performs
|
|
multi-source web research and returns comprehensive reports with citations.
|
|
"""
|
|
|
|
SYSTEM_PROMPT = (
|
|
"You are a deep research analyst. Provide a comprehensive, well-structured "
|
|
"research report on the user's topic. Include:\n"
|
|
"- Executive summary of key findings\n"
|
|
"- Detailed analysis organized by themes\n"
|
|
"- Specific data, statistics, and quantitative evidence\n"
|
|
"- Multiple authoritative sources\n"
|
|
"- Implications and future outlook where relevant\n"
|
|
"Use markdown formatting with clear section headers. "
|
|
"Cite all sources inline."
|
|
)
|
|
|
|
def __init__(self):
|
|
self.chat = ParallelChat()
|
|
|
|
def research(
|
|
self,
|
|
query: str,
|
|
model: str = "core",
|
|
system_prompt: Optional[str] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Run deep research via the Chat API.
|
|
|
|
Args:
|
|
query: The research question or topic.
|
|
model: Chat model to use ('base' or 'core', default 'core').
|
|
system_prompt: Optional override for the system prompt.
|
|
|
|
Returns:
|
|
Dict with 'response' (markdown report), 'citations', and metadata.
|
|
"""
|
|
result = self.chat.query(
|
|
user_message=query,
|
|
system_message=system_prompt or self.SYSTEM_PROMPT,
|
|
model=model,
|
|
)
|
|
|
|
if not result["success"]:
|
|
return {
|
|
"success": False,
|
|
"query": query,
|
|
"error": result.get("error", "Unknown error"),
|
|
"model": model,
|
|
"timestamp": result["timestamp"],
|
|
}
|
|
|
|
return {
|
|
"success": True,
|
|
"query": query,
|
|
"response": result["content"],
|
|
"output": result["content"],
|
|
"citations": result["sources"],
|
|
"sources": result["sources"],
|
|
"citation_count": result["citation_count"],
|
|
"model": model,
|
|
"backend": "parallel-chat",
|
|
"timestamp": result["timestamp"],
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI Interface
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _print_search_results(result: Dict[str, Any], output_file=None):
|
|
"""Print search results (synthesized summary + sources)."""
|
|
def write(text):
|
|
if output_file:
|
|
output_file.write(text + "\n")
|
|
else:
|
|
print(text)
|
|
|
|
if not result["success"]:
|
|
write(f"Error: {result.get('error', 'Unknown error')}")
|
|
return
|
|
|
|
write(f"\n{'='*80}")
|
|
write(f"Search: {result['objective']}")
|
|
write(f"Model: {result['model']} | Time: {result['timestamp']}")
|
|
write(f"{'='*80}\n")
|
|
|
|
write(result.get("response", "No response received."))
|
|
|
|
sources = result.get("sources", [])
|
|
if sources:
|
|
write(f"\n\n{'='*40} SOURCES {'='*40}")
|
|
for i, src in enumerate(sources):
|
|
title = src.get("title", "Untitled")
|
|
url = src.get("url", "")
|
|
write(f" [{i+1}] {title}")
|
|
if url:
|
|
write(f" {url}")
|
|
|
|
|
|
def _print_extract_results(result: Dict[str, Any], output_file=None):
|
|
"""Pretty-print extract results."""
|
|
def write(text):
|
|
if output_file:
|
|
output_file.write(text + "\n")
|
|
else:
|
|
print(text)
|
|
|
|
if not result["success"]:
|
|
write(f"Error: {result.get('error', 'Unknown error')}")
|
|
return
|
|
|
|
write(f"\n{'='*80}")
|
|
write(f"Extracted from: {', '.join(result['urls'])}")
|
|
write(f"Time: {result['timestamp']}")
|
|
write(f"{'='*80}")
|
|
|
|
for i, r in enumerate(result["results"]):
|
|
write(f"\n--- [{i+1}] {r['title']} ---")
|
|
write(f"URL: {r['url']}")
|
|
if r.get("full_content"):
|
|
write(f"\n{r['full_content']}")
|
|
elif r.get("excerpts"):
|
|
for j, excerpt in enumerate(r["excerpts"]):
|
|
write(f"\nExcerpt {j+1}:")
|
|
write(excerpt[:2000] if len(excerpt) > 2000 else excerpt)
|
|
|
|
if result.get("errors"):
|
|
write(f"\nErrors: {result['errors']}")
|
|
|
|
|
|
def _print_research_results(result: Dict[str, Any], output_file=None):
|
|
"""Print deep research results (report + sources)."""
|
|
def write(text):
|
|
if output_file:
|
|
output_file.write(text + "\n")
|
|
else:
|
|
print(text)
|
|
|
|
if not result["success"]:
|
|
write(f"Error: {result.get('error', 'Unknown error')}")
|
|
return
|
|
|
|
write(f"\n{'='*80}")
|
|
query_display = result['query'][:100]
|
|
if len(result['query']) > 100:
|
|
query_display += "..."
|
|
write(f"Research: {query_display}")
|
|
write(f"Model: {result['model']} | Citations: {result.get('citation_count', 0)} | Time: {result['timestamp']}")
|
|
write(f"{'='*80}\n")
|
|
|
|
write(result.get("response", result.get("output", "No output received.")))
|
|
|
|
citations = result.get("citations", result.get("sources", []))
|
|
if citations:
|
|
write(f"\n\n{'='*40} SOURCES {'='*40}")
|
|
seen_urls = set()
|
|
for cit in citations:
|
|
url = cit.get("url", "")
|
|
if url and url not in seen_urls:
|
|
seen_urls.add(url)
|
|
title = cit.get("title", "Untitled")
|
|
write(f" [{len(seen_urls)}] {title}")
|
|
write(f" {url}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Parallel Web Systems API Client - Search, Extract, and Deep Research",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python parallel_web.py search "latest advances in quantum computing"
|
|
python parallel_web.py search "climate policy 2025" --model core
|
|
python parallel_web.py extract "https://example.com" --objective "key findings"
|
|
python parallel_web.py research "comprehensive analysis of EV battery market"
|
|
python parallel_web.py research "compare mRNA vs protein subunit vaccines" --model base
|
|
python parallel_web.py research "AI regulation landscape 2025" -o report.md
|
|
""",
|
|
)
|
|
|
|
subparsers = parser.add_subparsers(dest="command", help="API command")
|
|
|
|
# --- search subcommand ---
|
|
search_parser = subparsers.add_parser("search", help="Web search via Chat API (synthesized results)")
|
|
search_parser.add_argument("objective", help="Natural language search objective")
|
|
search_parser.add_argument("--model", default="base", choices=["base", "core"],
|
|
help="Chat model to use (default: base)")
|
|
search_parser.add_argument("-o", "--output", help="Write output to file")
|
|
search_parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
|
|
# --- extract subcommand ---
|
|
extract_parser = subparsers.add_parser("extract", help="Extract content from URLs (verification only)")
|
|
extract_parser.add_argument("urls", nargs="+", help="One or more URLs to extract")
|
|
extract_parser.add_argument("--objective", help="Objective to focus extraction")
|
|
extract_parser.add_argument("--full-content", action="store_true", help="Return full page content")
|
|
extract_parser.add_argument("-o", "--output", help="Write output to file")
|
|
extract_parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
|
|
# --- research subcommand ---
|
|
research_parser = subparsers.add_parser("research", help="Deep research via Chat API (comprehensive report)")
|
|
research_parser.add_argument("query", help="Research question or topic")
|
|
research_parser.add_argument("--model", default="core", choices=["base", "core"],
|
|
help="Chat model to use (default: core)")
|
|
research_parser.add_argument("-o", "--output", help="Write output to file")
|
|
research_parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.command:
|
|
parser.print_help()
|
|
return 1
|
|
|
|
output_file = None
|
|
if hasattr(args, "output") and args.output:
|
|
output_file = open(args.output, "w", encoding="utf-8")
|
|
|
|
try:
|
|
if args.command == "search":
|
|
searcher = ParallelSearch()
|
|
result = searcher.search(
|
|
objective=args.objective,
|
|
model=args.model,
|
|
)
|
|
if args.json:
|
|
text = json.dumps(result, indent=2, ensure_ascii=False, default=str)
|
|
(output_file or sys.stdout).write(text + "\n")
|
|
else:
|
|
_print_search_results(result, output_file)
|
|
|
|
elif args.command == "extract":
|
|
extractor = ParallelExtract()
|
|
result = extractor.extract(
|
|
urls=args.urls,
|
|
objective=args.objective,
|
|
full_content=args.full_content,
|
|
)
|
|
if args.json:
|
|
text = json.dumps(result, indent=2, ensure_ascii=False, default=str)
|
|
(output_file or sys.stdout).write(text + "\n")
|
|
else:
|
|
_print_extract_results(result, output_file)
|
|
|
|
elif args.command == "research":
|
|
researcher = ParallelDeepResearch()
|
|
result = researcher.research(
|
|
query=args.query,
|
|
model=args.model,
|
|
)
|
|
if args.json:
|
|
text = json.dumps(result, indent=2, ensure_ascii=False, default=str)
|
|
(output_file or sys.stdout).write(text + "\n")
|
|
else:
|
|
_print_research_results(result, output_file)
|
|
|
|
return 0
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
finally:
|
|
if output_file:
|
|
output_file.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|