Enhance citation management and literature review guidelines

- Updated SKILL.md in citation management to include best practices for identifying seminal and high-impact papers, emphasizing citation count thresholds, venue quality tiers, and author reputation indicators. - Expanded literature review SKILL.md to prioritize high-impact papers, detailing citation metrics, journal tiers, and author reputation assessment. - Added comprehensive evaluation strategies for paper impact and quality in literature_search_strategies.md, including citation count significance and journal impact factor guidance. - Improved research lookup scripts to prioritize results based on citation count, venue prestige, and author reputation, enhancing the quality of research outputs.
2026-01-26 16:58:56 +08:00 · 2026-01-05 13:01:10 -08:00
parent d243a12564
commit 3439a21f57
41 changed files with 11802 additions and 61 deletions
--- a/scientific-skills/research-lookup/scripts/lookup.py
+++ b/scientific-skills/research-lookup/scripts/lookup.py
@@ -21,6 +21,7 @@ def format_response(result: Dict) -> str:

    response = result["response"]
    citations = result["citations"]
+    sources = result.get("sources", [])

    # Format the output for Claude Code
    output = f"""🔍 **Research Results**
@@ -28,6 +29,7 @@ def format_response(result: Dict) -> str:
 **Query:** {result['query']}
 **Model:** {result['model']}
 **Timestamp:** {result['timestamp']}
+**Note:** Results prioritized by citation count, venue prestige, and author reputation

 ---

@@ -35,15 +37,48 @@ def format_response(result: Dict) -> str:

 """

+    # Display API-provided sources with venue/citation info
+    if sources:
+        output += f"\n📚 **Sources ({len(sources)}):**\n"
+        output += "_Prioritized by venue quality and citation impact_\n\n"
+        for i, source in enumerate(sources, 1):
+            title = source.get("title", "Untitled")
+            url = source.get("url", "")
+            date = source.get("date", "")
+            snippet = source.get("snippet", "")
+            
+            # Format source entry with available metadata
+            date_str = f" ({date})" if date else ""
+            output += f"{i}. **{title}**{date_str}\n"
+            
+            # Add venue indicator if detectable from URL
+            venue_indicator = _detect_venue_tier(url)
+            if venue_indicator:
+                output += f"   📊 Venue: {venue_indicator}\n"
+            
+            if url:
+                output += f"   🔗 {url}\n"
+            if snippet:
+                output += f"   _{snippet[:150]}{'...' if len(snippet) > 150 else ''}_\n"
+            output += "\n"
+
+    # Display extracted citations (DOIs, etc.)
    if citations:
-        output += f"\n**Extracted Citations ({len(citations)}):**\n"
-        for i, citation in enumerate(citations, 1):
-            if citation.get("doi"):
-                output += f"{i}. DOI: {citation['doi']}\n"
-            elif citation.get("authors") and citation.get("year"):
-                output += f"{i}. {citation['authors']} ({citation['year']})\n"
-            else:
-                output += f"{i}. {citation}\n"
+        doi_citations = [c for c in citations if c.get("type") == "doi"]
+        url_citations = [c for c in citations if c.get("type") == "url"]
+        
+        if doi_citations:
+            output += f"\n🔗 **DOI References ({len(doi_citations)}):**\n"
+            for i, citation in enumerate(doi_citations, 1):
+                output += f"{i}. DOI: {citation.get('doi', '')} → {citation.get('url', '')}\n"
+        
+        if url_citations:
+            output += f"\n🌐 **Additional URLs ({len(url_citations)}):**\n"
+            for i, citation in enumerate(url_citations, 1):
+                url = citation.get('url', '')
+                venue = _detect_venue_tier(url)
+                venue_str = f" [{venue}]" if venue else ""
+                output += f"{i}. {url}{venue_str}\n"

    if result.get("usage"):
        usage = result["usage"]
@@ -52,6 +87,65 @@ def format_response(result: Dict) -> str:
    return output


+def _detect_venue_tier(url: str) -> Optional[str]:
+    """Detect venue tier from URL to indicate source quality."""
+    if not url:
+        return None
+    
+    url_lower = url.lower()
+    
+    # Tier 1 - Premier venues
+    tier1_indicators = {
+        "nature.com": "Nature (Tier 1)",
+        "science.org": "Science (Tier 1)",
+        "cell.com": "Cell Press (Tier 1)",
+        "nejm.org": "NEJM (Tier 1)",
+        "thelancet.com": "Lancet (Tier 1)",
+        "jamanetwork.com": "JAMA (Tier 1)",
+        "pnas.org": "PNAS (Tier 1)",
+    }
+    
+    # Tier 2 - High-impact specialized
+    tier2_indicators = {
+        "neurips.cc": "NeurIPS (Tier 2 - Top ML)",
+        "icml.cc": "ICML (Tier 2 - Top ML)",
+        "openreview.net": "Top ML Conference (Tier 2)",
+        "aacrjournals.org": "AACR Journals (Tier 2)",
+        "ahajournals.org": "AHA Journals (Tier 2)",
+        "bloodjournal.org": "Blood (Tier 2)",
+        "jci.org": "JCI (Tier 2)",
+    }
+    
+    # Tier 3 - Respected academic sources
+    tier3_indicators = {
+        "springer.com": "Springer",
+        "wiley.com": "Wiley",
+        "elsevier.com": "Elsevier",
+        "oup.com": "Oxford University Press",
+        "arxiv.org": "arXiv (Preprint)",
+        "biorxiv.org": "bioRxiv (Preprint)",
+        "medrxiv.org": "medRxiv (Preprint)",
+        "pubmed": "PubMed",
+        "ncbi.nlm.nih.gov": "NCBI/PubMed",
+        "ieee.org": "IEEE",
+        "acm.org": "ACM",
+    }
+    
+    for domain, label in tier1_indicators.items():
+        if domain in url_lower:
+            return label
+    
+    for domain, label in tier2_indicators.items():
+        if domain in url_lower:
+            return label
+    
+    for domain, label in tier3_indicators.items():
+        if domain in url_lower:
+            return label
+    
+    return None
+
+
 def main():
    """Main entry point for Claude Code tool."""
    # Check for API key