mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Added parallel-web skill
Refactor research lookup skill to enhance backend routing and update documentation. The skill now intelligently selects between the Parallel Chat API and Perplexity sonar-pro-search based on query type. Added compatibility notes, license information, and improved descriptions for clarity. Removed outdated example scripts to streamline the codebase.
This commit is contained in:
568
scientific-skills/parallel-web/scripts/parallel_web.py
Normal file
568
scientific-skills/parallel-web/scripts/parallel_web.py
Normal file
@@ -0,0 +1,568 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Parallel Web Systems API Client
|
||||
|
||||
Provides web search, URL content extraction, and deep research capabilities
|
||||
using the Parallel Web Systems APIs (https://docs.parallel.ai).
|
||||
|
||||
Primary interface: Parallel Chat API (OpenAI-compatible) for search and research.
|
||||
Secondary interface: Extract API for URL verification and special cases.
|
||||
|
||||
Main classes:
|
||||
- ParallelChat: Core Chat API client (base/core models)
|
||||
- ParallelSearch: Web search via Chat API (base model)
|
||||
- ParallelDeepResearch: Deep research via Chat API (core model)
|
||||
- ParallelExtract: URL content extraction (Extract API, verification only)
|
||||
|
||||
Environment variable required:
|
||||
PARALLEL_API_KEY - Your Parallel API key from https://platform.parallel.ai
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
def _get_api_key():
|
||||
"""Validate and return the Parallel API key."""
|
||||
api_key = os.getenv("PARALLEL_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"PARALLEL_API_KEY environment variable not set.\n"
|
||||
"Get your key at https://platform.parallel.ai and set it:\n"
|
||||
" export PARALLEL_API_KEY='your_key_here'"
|
||||
)
|
||||
return api_key
|
||||
|
||||
|
||||
def _get_extract_client():
|
||||
"""Create and return a Parallel SDK client for the Extract API."""
|
||||
try:
|
||||
from parallel import Parallel
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The 'parallel-web' package is required for extract. Install it with:\n"
|
||||
" pip install parallel-web"
|
||||
)
|
||||
return Parallel(api_key=_get_api_key())
|
||||
|
||||
|
||||
class ParallelChat:
|
||||
"""Core client for the Parallel Chat API.
|
||||
|
||||
OpenAI-compatible chat completions endpoint that performs web research
|
||||
and returns synthesized responses with citations.
|
||||
|
||||
Models:
|
||||
- base : Standard research, factual queries (15-100s latency)
|
||||
- core : Complex research, multi-source synthesis (60s-5min latency)
|
||||
"""
|
||||
|
||||
CHAT_BASE_URL = "https://api.parallel.ai"
|
||||
|
||||
def __init__(self):
|
||||
try:
|
||||
from openai import OpenAI
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The 'openai' package is required. Install it with:\n"
|
||||
" pip install openai"
|
||||
)
|
||||
|
||||
self.client = OpenAI(
|
||||
api_key=_get_api_key(),
|
||||
base_url=self.CHAT_BASE_URL,
|
||||
)
|
||||
|
||||
def query(
|
||||
self,
|
||||
user_message: str,
|
||||
system_message: Optional[str] = None,
|
||||
model: str = "base",
|
||||
) -> Dict[str, Any]:
|
||||
"""Send a query to the Parallel Chat API.
|
||||
|
||||
Args:
|
||||
user_message: The research query or question.
|
||||
system_message: Optional system prompt to guide response style.
|
||||
model: Chat model to use ('base' or 'core').
|
||||
|
||||
Returns:
|
||||
Dict with 'content' (response text), 'sources' (citations), and metadata.
|
||||
"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
messages = []
|
||||
if system_message:
|
||||
messages.append({"role": "system", "content": system_message})
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
|
||||
try:
|
||||
print(f"[Parallel Chat] Querying model={model}...", file=sys.stderr)
|
||||
|
||||
response = self.client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
content = ""
|
||||
if response.choices and len(response.choices) > 0:
|
||||
content = response.choices[0].message.content or ""
|
||||
|
||||
sources = self._extract_basis(response)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"content": content,
|
||||
"sources": sources,
|
||||
"citation_count": len(sources),
|
||||
"model": model,
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"model": model,
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
|
||||
def _extract_basis(self, response) -> List[Dict[str, str]]:
|
||||
"""Extract citation sources from the Chat API research basis."""
|
||||
sources = []
|
||||
basis = getattr(response, "basis", None)
|
||||
if not basis:
|
||||
return sources
|
||||
|
||||
seen_urls = set()
|
||||
if isinstance(basis, list):
|
||||
for item in basis:
|
||||
citations = (
|
||||
item.get("citations", []) if isinstance(item, dict)
|
||||
else getattr(item, "citations", None) or []
|
||||
)
|
||||
for cit in citations:
|
||||
url = cit.get("url", "") if isinstance(cit, dict) else getattr(cit, "url", "")
|
||||
if url and url not in seen_urls:
|
||||
seen_urls.add(url)
|
||||
title = cit.get("title", "") if isinstance(cit, dict) else getattr(cit, "title", "")
|
||||
excerpts = cit.get("excerpts", []) if isinstance(cit, dict) else getattr(cit, "excerpts", [])
|
||||
sources.append({
|
||||
"type": "source",
|
||||
"url": url,
|
||||
"title": title,
|
||||
"excerpts": excerpts,
|
||||
})
|
||||
|
||||
return sources
|
||||
|
||||
|
||||
class ParallelSearch:
|
||||
"""Web search using the Parallel Chat API (base model).
|
||||
|
||||
Sends a search query to the Chat API which performs web research and
|
||||
returns a synthesized summary with cited sources.
|
||||
"""
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"You are a web research assistant. Search the web and synthesize information "
|
||||
"about the user's query. Provide a clear, well-organized summary with:\n"
|
||||
"- Key facts, data points, and statistics\n"
|
||||
"- Specific names, dates, and numbers when available\n"
|
||||
"- Multiple perspectives if the topic is debated\n"
|
||||
"Cite your sources inline. Be comprehensive but concise."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
self.chat = ParallelChat()
|
||||
|
||||
def search(
|
||||
self,
|
||||
objective: str,
|
||||
model: str = "base",
|
||||
) -> Dict[str, Any]:
|
||||
"""Execute a web search via the Chat API.
|
||||
|
||||
Args:
|
||||
objective: Natural language description of the search goal.
|
||||
model: Chat model to use ('base' or 'core', default 'base').
|
||||
|
||||
Returns:
|
||||
Dict with 'response' (synthesized text), 'sources', and metadata.
|
||||
"""
|
||||
result = self.chat.query(
|
||||
user_message=objective,
|
||||
system_message=self.SYSTEM_PROMPT,
|
||||
model=model,
|
||||
)
|
||||
|
||||
if not result["success"]:
|
||||
return {
|
||||
"success": False,
|
||||
"objective": objective,
|
||||
"error": result.get("error", "Unknown error"),
|
||||
"timestamp": result["timestamp"],
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"objective": objective,
|
||||
"response": result["content"],
|
||||
"sources": result["sources"],
|
||||
"citation_count": result["citation_count"],
|
||||
"model": result["model"],
|
||||
"backend": "parallel-chat",
|
||||
"timestamp": result["timestamp"],
|
||||
}
|
||||
|
||||
|
||||
class ParallelExtract:
|
||||
"""Extract clean content from URLs using Parallel's Extract API.
|
||||
|
||||
Converts any public URL into clean, LLM-optimized markdown.
|
||||
Use for citation verification and special cases only.
|
||||
For general research, use ParallelSearch or ParallelDeepResearch instead.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.client = _get_extract_client()
|
||||
|
||||
def extract(
|
||||
self,
|
||||
urls: List[str],
|
||||
objective: Optional[str] = None,
|
||||
excerpts: bool = True,
|
||||
full_content: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Extract content from one or more URLs.
|
||||
|
||||
Args:
|
||||
urls: List of URLs to extract content from.
|
||||
objective: Optional objective to focus extraction.
|
||||
excerpts: Whether to return focused excerpts (default True).
|
||||
full_content: Whether to return full page content (default False).
|
||||
|
||||
Returns:
|
||||
Dict with 'results' list containing url, title, excerpts/content.
|
||||
"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
kwargs = {
|
||||
"urls": urls,
|
||||
"excerpts": excerpts,
|
||||
"full_content": full_content,
|
||||
}
|
||||
if objective:
|
||||
kwargs["objective"] = objective
|
||||
|
||||
try:
|
||||
response = self.client.beta.extract(**kwargs)
|
||||
|
||||
results = []
|
||||
if hasattr(response, "results") and response.results:
|
||||
for r in response.results:
|
||||
result = {
|
||||
"url": getattr(r, "url", ""),
|
||||
"title": getattr(r, "title", ""),
|
||||
"publish_date": getattr(r, "publish_date", None),
|
||||
"excerpts": getattr(r, "excerpts", []),
|
||||
"full_content": getattr(r, "full_content", None),
|
||||
}
|
||||
results.append(result)
|
||||
|
||||
errors = []
|
||||
if hasattr(response, "errors") and response.errors:
|
||||
errors = [str(e) for e in response.errors]
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"urls": urls,
|
||||
"results": results,
|
||||
"errors": errors,
|
||||
"timestamp": timestamp,
|
||||
"extract_id": getattr(response, "extract_id", None),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"urls": urls,
|
||||
"error": str(e),
|
||||
"timestamp": timestamp,
|
||||
}
|
||||
|
||||
|
||||
class ParallelDeepResearch:
|
||||
"""Deep research using the Parallel Chat API (core model).
|
||||
|
||||
Sends complex research queries to the Chat API which performs
|
||||
multi-source web research and returns comprehensive reports with citations.
|
||||
"""
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"You are a deep research analyst. Provide a comprehensive, well-structured "
|
||||
"research report on the user's topic. Include:\n"
|
||||
"- Executive summary of key findings\n"
|
||||
"- Detailed analysis organized by themes\n"
|
||||
"- Specific data, statistics, and quantitative evidence\n"
|
||||
"- Multiple authoritative sources\n"
|
||||
"- Implications and future outlook where relevant\n"
|
||||
"Use markdown formatting with clear section headers. "
|
||||
"Cite all sources inline."
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
self.chat = ParallelChat()
|
||||
|
||||
def research(
|
||||
self,
|
||||
query: str,
|
||||
model: str = "core",
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run deep research via the Chat API.
|
||||
|
||||
Args:
|
||||
query: The research question or topic.
|
||||
model: Chat model to use ('base' or 'core', default 'core').
|
||||
system_prompt: Optional override for the system prompt.
|
||||
|
||||
Returns:
|
||||
Dict with 'response' (markdown report), 'citations', and metadata.
|
||||
"""
|
||||
result = self.chat.query(
|
||||
user_message=query,
|
||||
system_message=system_prompt or self.SYSTEM_PROMPT,
|
||||
model=model,
|
||||
)
|
||||
|
||||
if not result["success"]:
|
||||
return {
|
||||
"success": False,
|
||||
"query": query,
|
||||
"error": result.get("error", "Unknown error"),
|
||||
"model": model,
|
||||
"timestamp": result["timestamp"],
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"query": query,
|
||||
"response": result["content"],
|
||||
"output": result["content"],
|
||||
"citations": result["sources"],
|
||||
"sources": result["sources"],
|
||||
"citation_count": result["citation_count"],
|
||||
"model": model,
|
||||
"backend": "parallel-chat",
|
||||
"timestamp": result["timestamp"],
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI Interface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _print_search_results(result: Dict[str, Any], output_file=None):
|
||||
"""Print search results (synthesized summary + sources)."""
|
||||
def write(text):
|
||||
if output_file:
|
||||
output_file.write(text + "\n")
|
||||
else:
|
||||
print(text)
|
||||
|
||||
if not result["success"]:
|
||||
write(f"Error: {result.get('error', 'Unknown error')}")
|
||||
return
|
||||
|
||||
write(f"\n{'='*80}")
|
||||
write(f"Search: {result['objective']}")
|
||||
write(f"Model: {result['model']} | Time: {result['timestamp']}")
|
||||
write(f"{'='*80}\n")
|
||||
|
||||
write(result.get("response", "No response received."))
|
||||
|
||||
sources = result.get("sources", [])
|
||||
if sources:
|
||||
write(f"\n\n{'='*40} SOURCES {'='*40}")
|
||||
for i, src in enumerate(sources):
|
||||
title = src.get("title", "Untitled")
|
||||
url = src.get("url", "")
|
||||
write(f" [{i+1}] {title}")
|
||||
if url:
|
||||
write(f" {url}")
|
||||
|
||||
|
||||
def _print_extract_results(result: Dict[str, Any], output_file=None):
|
||||
"""Pretty-print extract results."""
|
||||
def write(text):
|
||||
if output_file:
|
||||
output_file.write(text + "\n")
|
||||
else:
|
||||
print(text)
|
||||
|
||||
if not result["success"]:
|
||||
write(f"Error: {result.get('error', 'Unknown error')}")
|
||||
return
|
||||
|
||||
write(f"\n{'='*80}")
|
||||
write(f"Extracted from: {', '.join(result['urls'])}")
|
||||
write(f"Time: {result['timestamp']}")
|
||||
write(f"{'='*80}")
|
||||
|
||||
for i, r in enumerate(result["results"]):
|
||||
write(f"\n--- [{i+1}] {r['title']} ---")
|
||||
write(f"URL: {r['url']}")
|
||||
if r.get("full_content"):
|
||||
write(f"\n{r['full_content']}")
|
||||
elif r.get("excerpts"):
|
||||
for j, excerpt in enumerate(r["excerpts"]):
|
||||
write(f"\nExcerpt {j+1}:")
|
||||
write(excerpt[:2000] if len(excerpt) > 2000 else excerpt)
|
||||
|
||||
if result.get("errors"):
|
||||
write(f"\nErrors: {result['errors']}")
|
||||
|
||||
|
||||
def _print_research_results(result: Dict[str, Any], output_file=None):
|
||||
"""Print deep research results (report + sources)."""
|
||||
def write(text):
|
||||
if output_file:
|
||||
output_file.write(text + "\n")
|
||||
else:
|
||||
print(text)
|
||||
|
||||
if not result["success"]:
|
||||
write(f"Error: {result.get('error', 'Unknown error')}")
|
||||
return
|
||||
|
||||
write(f"\n{'='*80}")
|
||||
query_display = result['query'][:100]
|
||||
if len(result['query']) > 100:
|
||||
query_display += "..."
|
||||
write(f"Research: {query_display}")
|
||||
write(f"Model: {result['model']} | Citations: {result.get('citation_count', 0)} | Time: {result['timestamp']}")
|
||||
write(f"{'='*80}\n")
|
||||
|
||||
write(result.get("response", result.get("output", "No output received.")))
|
||||
|
||||
citations = result.get("citations", result.get("sources", []))
|
||||
if citations:
|
||||
write(f"\n\n{'='*40} SOURCES {'='*40}")
|
||||
seen_urls = set()
|
||||
for cit in citations:
|
||||
url = cit.get("url", "")
|
||||
if url and url not in seen_urls:
|
||||
seen_urls.add(url)
|
||||
title = cit.get("title", "Untitled")
|
||||
write(f" [{len(seen_urls)}] {title}")
|
||||
write(f" {url}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parallel Web Systems API Client - Search, Extract, and Deep Research",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
python parallel_web.py search "latest advances in quantum computing"
|
||||
python parallel_web.py search "climate policy 2025" --model core
|
||||
python parallel_web.py extract "https://example.com" --objective "key findings"
|
||||
python parallel_web.py research "comprehensive analysis of EV battery market"
|
||||
python parallel_web.py research "compare mRNA vs protein subunit vaccines" --model base
|
||||
python parallel_web.py research "AI regulation landscape 2025" -o report.md
|
||||
""",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="API command")
|
||||
|
||||
# --- search subcommand ---
|
||||
search_parser = subparsers.add_parser("search", help="Web search via Chat API (synthesized results)")
|
||||
search_parser.add_argument("objective", help="Natural language search objective")
|
||||
search_parser.add_argument("--model", default="base", choices=["base", "core"],
|
||||
help="Chat model to use (default: base)")
|
||||
search_parser.add_argument("-o", "--output", help="Write output to file")
|
||||
search_parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
# --- extract subcommand ---
|
||||
extract_parser = subparsers.add_parser("extract", help="Extract content from URLs (verification only)")
|
||||
extract_parser.add_argument("urls", nargs="+", help="One or more URLs to extract")
|
||||
extract_parser.add_argument("--objective", help="Objective to focus extraction")
|
||||
extract_parser.add_argument("--full-content", action="store_true", help="Return full page content")
|
||||
extract_parser.add_argument("-o", "--output", help="Write output to file")
|
||||
extract_parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
# --- research subcommand ---
|
||||
research_parser = subparsers.add_parser("research", help="Deep research via Chat API (comprehensive report)")
|
||||
research_parser.add_argument("query", help="Research question or topic")
|
||||
research_parser.add_argument("--model", default="core", choices=["base", "core"],
|
||||
help="Chat model to use (default: core)")
|
||||
research_parser.add_argument("-o", "--output", help="Write output to file")
|
||||
research_parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return 1
|
||||
|
||||
output_file = None
|
||||
if hasattr(args, "output") and args.output:
|
||||
output_file = open(args.output, "w", encoding="utf-8")
|
||||
|
||||
try:
|
||||
if args.command == "search":
|
||||
searcher = ParallelSearch()
|
||||
result = searcher.search(
|
||||
objective=args.objective,
|
||||
model=args.model,
|
||||
)
|
||||
if args.json:
|
||||
text = json.dumps(result, indent=2, ensure_ascii=False, default=str)
|
||||
(output_file or sys.stdout).write(text + "\n")
|
||||
else:
|
||||
_print_search_results(result, output_file)
|
||||
|
||||
elif args.command == "extract":
|
||||
extractor = ParallelExtract()
|
||||
result = extractor.extract(
|
||||
urls=args.urls,
|
||||
objective=args.objective,
|
||||
full_content=args.full_content,
|
||||
)
|
||||
if args.json:
|
||||
text = json.dumps(result, indent=2, ensure_ascii=False, default=str)
|
||||
(output_file or sys.stdout).write(text + "\n")
|
||||
else:
|
||||
_print_extract_results(result, output_file)
|
||||
|
||||
elif args.command == "research":
|
||||
researcher = ParallelDeepResearch()
|
||||
result = researcher.research(
|
||||
query=args.query,
|
||||
model=args.model,
|
||||
)
|
||||
if args.json:
|
||||
text = json.dumps(result, indent=2, ensure_ascii=False, default=str)
|
||||
(output_file or sys.stdout).write(text + "\n")
|
||||
else:
|
||||
_print_research_results(result, output_file)
|
||||
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
finally:
|
||||
if output_file:
|
||||
output_file.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user