Add open-notebook skill: self-hosted NotebookLM alternative (issue #56)

Implements the open-notebook skill as a comprehensive integration for the open-source, self-hosted alternative to Google NotebookLM. Addresses the gap created by Google not providing a public NotebookLM API. Developed using TDD with 44 tests covering skill structure, SKILL.md frontmatter/content, reference documentation, example scripts, API endpoint coverage, and marketplace.json registration. Includes: - SKILL.md with full documentation, code examples, and provider matrix - references/api_reference.md covering all 20+ REST API endpoint groups - references/examples.md with complete research workflow examples - references/configuration.md with Docker, env vars, and security setup - references/architecture.md with system design and data flow diagrams - scripts/ with 3 example scripts (notebook, source, chat) + test suite - marketplace.json updated to register the new skill Closes #56 https://claude.ai/code/session_015CqcNWNYmDF9sqxKxziXcz
2026-03-27 07:09:27 +08:00 · 2026-02-23 00:18:19 +00:00
parent f7585b7624
commit 259e01f7fd
10 changed files with 2599 additions and 0 deletions
--- a/scientific-skills/open-notebook/scripts/chat_interaction.py
+++ b/scientific-skills/open-notebook/scripts/chat_interaction.py
@@ -0,0 +1,190 @@
+"""
+Open Notebook - Chat Interaction Example
+
+Demonstrates creating chat sessions, sending messages with context,
+and searching across research materials.
+
+Prerequisites:
+    pip install requests
+
+Usage:
+    export OPEN_NOTEBOOK_URL="http://localhost:5055"
+    python chat_interaction.py
+"""
+
+import os
+import requests
+
+BASE_URL = os.getenv("OPEN_NOTEBOOK_URL", "http://localhost:5055") + "/api"
+
+
+def create_chat_session(notebook_id, title, model_override=None):
+    """Create a new chat session within a notebook."""
+    payload = {
+        "notebook_id": notebook_id,
+        "title": title,
+    }
+    if model_override:
+        payload["model_override"] = model_override
+    response = requests.post(f"{BASE_URL}/chat/sessions", json=payload)
+    response.raise_for_status()
+    session = response.json()
+    print(f"Created chat session: {session['id']} - {title}")
+    return session
+
+
+def list_chat_sessions(notebook_id):
+    """List all chat sessions for a notebook."""
+    response = requests.get(
+        f"{BASE_URL}/chat/sessions",
+        params={"notebook_id": notebook_id},
+    )
+    response.raise_for_status()
+    sessions = response.json()
+    print(f"Found {len(sessions)} chat session(s):")
+    for s in sessions:
+        print(f"  - {s['id']}: {s.get('title', 'Untitled')} "
+              f"({s.get('message_count', 0)} messages)")
+    return sessions
+
+
+def send_chat_message(session_id, message, include_sources=True,
+                      include_notes=True, model_override=None):
+    """Send a message to a chat session with context from sources and notes."""
+    payload = {
+        "session_id": session_id,
+        "message": message,
+        "context": {
+            "include_sources": include_sources,
+            "include_notes": include_notes,
+        },
+    }
+    if model_override:
+        payload["model_override"] = model_override
+    response = requests.post(f"{BASE_URL}/chat/execute", json=payload)
+    response.raise_for_status()
+    result = response.json()
+    print(f"\nUser: {message}")
+    print(f"AI: {result.get('response', result)}")
+    return result
+
+
+def get_session_history(session_id):
+    """Retrieve full message history for a chat session."""
+    response = requests.get(f"{BASE_URL}/chat/sessions/{session_id}")
+    response.raise_for_status()
+    session = response.json()
+    messages = session.get("messages", [])
+    print(f"\n--- Session History ({len(messages)} messages) ---")
+    for msg in messages:
+        role = msg.get("role", "unknown")
+        content = msg.get("content", "")
+        print(f"[{role}]: {content[:200]}...")
+    return session
+
+
+def build_context(notebook_id, source_ids=None, note_ids=None):
+    """Build context data from sources and notes for inspection."""
+    payload = {"notebook_id": notebook_id}
+    if source_ids:
+        payload["source_ids"] = source_ids
+    if note_ids:
+        payload["note_ids"] = note_ids
+    response = requests.post(f"{BASE_URL}/chat/context", json=payload)
+    response.raise_for_status()
+    context = response.json()
+    print(f"Context built: {context.get('token_count', '?')} tokens, "
+          f"{context.get('char_count', '?')} characters")
+    return context
+
+
+def search_knowledge_base(query, search_type="vector", limit=5):
+    """Search across all materials in the knowledge base."""
+    response = requests.post(f"{BASE_URL}/search", json={
+        "query": query,
+        "search_type": search_type,
+        "limit": limit,
+    })
+    response.raise_for_status()
+    results = response.json()
+    print(f"\nSearch results for '{query}' ({results.get('total', 0)} hits):")
+    for r in results.get("results", []):
+        title = r.get("title", "Untitled")
+        similarity = r.get("similarity", "N/A")
+        print(f"  - {title} (similarity: {similarity})")
+    return results
+
+
+def ask_question(query):
+    """Ask a question and get an AI-generated answer from the knowledge base."""
+    response = requests.post(f"{BASE_URL}/search/ask/simple", json={
+        "query": query,
+    })
+    response.raise_for_status()
+    result = response.json()
+    print(f"\nQ: {query}")
+    print(f"A: {result.get('response', result)}")
+    return result
+
+
+def delete_chat_session(session_id):
+    """Delete a chat session."""
+    response = requests.delete(f"{BASE_URL}/chat/sessions/{session_id}")
+    response.raise_for_status()
+    print(f"Deleted chat session: {session_id}")
+
+
+if __name__ == "__main__":
+    print("=== Chat Interaction Demo ===\n")
+
+    # Create a notebook with some content first
+    notebook = requests.post(f"{BASE_URL}/notebooks", json={
+        "name": "Chat Demo",
+        "description": "Demonstrating chat interactions",
+    }).json()
+    notebook_id = notebook["id"]
+
+    # Add a text source for context
+    requests.post(f"{BASE_URL}/sources", data={
+        "text": (
+            "Immunotherapy has revolutionized cancer treatment. "
+            "Checkpoint inhibitors targeting PD-1 and PD-L1 have shown "
+            "remarkable efficacy in non-small cell lung cancer, melanoma, "
+            "and several other tumor types. Tumor mutational burden (TMB) "
+            "has emerged as a key biomarker for predicting response to "
+            "immunotherapy. Patients with high TMB tend to generate more "
+            "neoantigens, making their tumors more visible to the immune system."
+        ),
+        "notebook_id": notebook_id,
+        "process_async": "false",
+    })
+
+    # Create a chat session
+    session = create_chat_session(notebook_id, "Immunotherapy Discussion")
+
+    # Have a conversation
+    print()
+    send_chat_message(
+        session["id"],
+        "What are the main biomarkers for immunotherapy response?",
+    )
+
+    send_chat_message(
+        session["id"],
+        "How does TMB relate to neoantigen load?",
+    )
+
+    # View conversation history
+    get_session_history(session["id"])
+
+    # Search the knowledge base
+    search_knowledge_base("checkpoint inhibitor efficacy")
+
+    # Ask a standalone question
+    ask_question("What is the role of PD-L1 in cancer immunotherapy?")
+
+    # Clean up
+    print()
+    delete_chat_session(session["id"])
+    requests.delete(f"{BASE_URL}/notebooks/{notebook_id}")
+    print("Cleanup complete")
--- a/scientific-skills/open-notebook/scripts/notebook_management.py
+++ b/scientific-skills/open-notebook/scripts/notebook_management.py
@@ -0,0 +1,142 @@
+"""
+Open Notebook - Notebook Management Example
+
+Demonstrates creating, listing, updating, and deleting notebooks
+using the Open Notebook REST API.
+
+Prerequisites:
+    pip install requests
+
+Usage:
+    export OPEN_NOTEBOOK_URL="http://localhost:5055"
+    python notebook_management.py
+"""
+
+import os
+import requests
+
+BASE_URL = os.getenv("OPEN_NOTEBOOK_URL", "http://localhost:5055") + "/api"
+
+
+def create_notebook(name, description=""):
+    """Create a new notebook."""
+    response = requests.post(f"{BASE_URL}/notebooks", json={
+        "name": name,
+        "description": description,
+    })
+    response.raise_for_status()
+    notebook = response.json()
+    print(f"Created notebook: {notebook['id']} - {notebook['name']}")
+    return notebook
+
+
+def list_notebooks(archived=False):
+    """List all notebooks, optionally filtering by archived status."""
+    response = requests.get(f"{BASE_URL}/notebooks", params={
+        "archived": archived,
+    })
+    response.raise_for_status()
+    notebooks = response.json()
+    print(f"Found {len(notebooks)} notebook(s):")
+    for nb in notebooks:
+        print(f"  - {nb['id']}: {nb['name']} "
+              f"(sources: {nb.get('source_count', 0)}, "
+              f"notes: {nb.get('note_count', 0)})")
+    return notebooks
+
+
+def get_notebook(notebook_id):
+    """Retrieve a single notebook by ID."""
+    response = requests.get(f"{BASE_URL}/notebooks/{notebook_id}")
+    response.raise_for_status()
+    return response.json()
+
+
+def update_notebook(notebook_id, name=None, description=None, archived=None):
+    """Update notebook fields."""
+    payload = {}
+    if name is not None:
+        payload["name"] = name
+    if description is not None:
+        payload["description"] = description
+    if archived is not None:
+        payload["archived"] = archived
+    response = requests.put(
+        f"{BASE_URL}/notebooks/{notebook_id}", json=payload
+    )
+    response.raise_for_status()
+    updated = response.json()
+    print(f"Updated notebook: {updated['id']} - {updated['name']}")
+    return updated
+
+
+def delete_notebook(notebook_id, delete_sources=False):
+    """Delete a notebook and optionally its exclusive sources."""
+    # Preview what will be deleted
+    preview = requests.get(
+        f"{BASE_URL}/notebooks/{notebook_id}/delete-preview"
+    ).json()
+    print(f"Deletion will affect {preview.get('note_count', 0)} notes "
+          f"and {preview.get('source_count', 0)} sources")
+
+    response = requests.delete(
+        f"{BASE_URL}/notebooks/{notebook_id}",
+        params={"delete_sources": delete_sources},
+    )
+    response.raise_for_status()
+    print(f"Deleted notebook: {notebook_id}")
+
+
+def link_source_to_notebook(notebook_id, source_id):
+    """Associate an existing source with a notebook."""
+    response = requests.post(
+        f"{BASE_URL}/notebooks/{notebook_id}/sources/{source_id}"
+    )
+    response.raise_for_status()
+    print(f"Linked source {source_id} to notebook {notebook_id}")
+
+
+def unlink_source_from_notebook(notebook_id, source_id):
+    """Remove the association between a source and a notebook."""
+    response = requests.delete(
+        f"{BASE_URL}/notebooks/{notebook_id}/sources/{source_id}"
+    )
+    response.raise_for_status()
+    print(f"Unlinked source {source_id} from notebook {notebook_id}")
+
+
+if __name__ == "__main__":
+    # Demo workflow
+    print("=== Notebook Management Demo ===\n")
+
+    # Create notebooks
+    nb1 = create_notebook(
+        "Protein Folding Research",
+        "Literature review on AlphaFold and related methods"
+    )
+    nb2 = create_notebook(
+        "CRISPR Gene Editing",
+        "Survey of CRISPR-Cas9 applications in therapeutics"
+    )
+
+    # List all notebooks
+    print()
+    list_notebooks()
+
+    # Update a notebook
+    print()
+    update_notebook(nb1["id"], description="Updated: Including ESMFold comparisons")
+
+    # Archive a notebook
+    print()
+    update_notebook(nb2["id"], archived=True)
+    print("\nActive notebooks:")
+    list_notebooks(archived=False)
+
+    print("\nArchived notebooks:")
+    list_notebooks(archived=True)
+
+    # Clean up
+    print()
+    delete_notebook(nb1["id"])
+    delete_notebook(nb2["id"])
--- a/scientific-skills/open-notebook/scripts/source_ingestion.py
+++ b/scientific-skills/open-notebook/scripts/source_ingestion.py
@@ -0,0 +1,160 @@
+"""
+Open Notebook - Source Ingestion Example
+
+Demonstrates ingesting various content types (URLs, files, text) into
+Open Notebook and monitoring processing status.
+
+Prerequisites:
+    pip install requests
+
+Usage:
+    export OPEN_NOTEBOOK_URL="http://localhost:5055"
+    python source_ingestion.py
+"""
+
+import os
+import time
+import requests
+
+BASE_URL = os.getenv("OPEN_NOTEBOOK_URL", "http://localhost:5055") + "/api"
+
+
+def add_url_source(notebook_id, url, process_async=True):
+    """Add a web URL as a source to a notebook."""
+    response = requests.post(f"{BASE_URL}/sources", data={
+        "url": url,
+        "notebook_id": notebook_id,
+        "process_async": str(process_async).lower(),
+    })
+    response.raise_for_status()
+    source = response.json()
+    print(f"Added URL source: {source['id']} - {url}")
+    return source
+
+
+def add_text_source(notebook_id, title, text):
+    """Add raw text as a source."""
+    response = requests.post(f"{BASE_URL}/sources", data={
+        "text": text,
+        "notebook_id": notebook_id,
+        "process_async": "false",
+    })
+    response.raise_for_status()
+    source = response.json()
+    print(f"Added text source: {source['id']} - {title}")
+    return source
+
+
+def upload_file_source(notebook_id, file_path, process_async=True):
+    """Upload a file (PDF, DOCX, audio, video) as a source."""
+    filename = os.path.basename(file_path)
+    with open(file_path, "rb") as f:
+        response = requests.post(
+            f"{BASE_URL}/sources",
+            data={
+                "notebook_id": notebook_id,
+                "process_async": str(process_async).lower(),
+            },
+            files={"file": (filename, f)},
+        )
+    response.raise_for_status()
+    source = response.json()
+    print(f"Uploaded file source: {source['id']} - {filename}")
+    return source
+
+
+def wait_for_processing(source_id, poll_interval=5, timeout=300):
+    """Poll source processing status until completion or timeout."""
+    elapsed = 0
+    while elapsed < timeout:
+        response = requests.get(f"{BASE_URL}/sources/{source_id}/status")
+        response.raise_for_status()
+        status = response.json()
+        current_status = status.get("status", "unknown")
+        print(f"  Source {source_id}: {current_status}")
+
+        if current_status in ("completed", "failed"):
+            return status
+        time.sleep(poll_interval)
+        elapsed += poll_interval
+
+    print(f"  Source {source_id}: timed out after {timeout}s")
+    return None
+
+
+def list_sources(notebook_id=None, limit=20):
+    """List sources, optionally filtered by notebook."""
+    params = {"limit": limit}
+    if notebook_id:
+        params["notebook_id"] = notebook_id
+    response = requests.get(f"{BASE_URL}/sources", params=params)
+    response.raise_for_status()
+    sources = response.json()
+    print(f"Found {len(sources)} source(s):")
+    for src in sources:
+        print(f"  - {src['id']}: {src.get('title', 'Untitled')}")
+    return sources
+
+
+def get_source_insights(source_id):
+    """Retrieve AI-generated insights for a source."""
+    response = requests.get(f"{BASE_URL}/sources/{source_id}/insights")
+    response.raise_for_status()
+    return response.json()
+
+
+def retry_failed_source(source_id):
+    """Retry processing for a failed source."""
+    response = requests.post(f"{BASE_URL}/sources/{source_id}/retry")
+    response.raise_for_status()
+    print(f"Retrying source: {source_id}")
+    return response.json()
+
+
+def delete_source(source_id):
+    """Delete a source."""
+    response = requests.delete(f"{BASE_URL}/sources/{source_id}")
+    response.raise_for_status()
+    print(f"Deleted source: {source_id}")
+
+
+if __name__ == "__main__":
+    print("=== Source Ingestion Demo ===\n")
+
+    # Create a notebook first
+    notebook = requests.post(f"{BASE_URL}/notebooks", json={
+        "name": "Source Ingestion Demo",
+        "description": "Testing various source types",
+    }).json()
+    notebook_id = notebook["id"]
+    print(f"Created notebook: {notebook_id}\n")
+
+    # Add a URL source
+    url_source = add_url_source(
+        notebook_id,
+        "https://en.wikipedia.org/wiki/CRISPR_gene_editing",
+    )
+
+    # Add a text source
+    text_source = add_text_source(
+        notebook_id,
+        "Research Notes",
+        "CRISPR-Cas9 is a genome editing tool that allows researchers to "
+        "alter DNA sequences and modify gene function. It has transformed "
+        "biological research and offers potential for treating genetic diseases.",
+    )
+
+    # Wait for async processing
+    print("\nWaiting for processing...")
+    wait_for_processing(url_source["id"])
+
+    # List all sources in the notebook
+    print()
+    list_sources(notebook_id)
+
+    # Clean up
+    print()
+    delete_source(url_source["id"])
+    delete_source(text_source["id"])
+    requests.delete(f"{BASE_URL}/notebooks/{notebook_id}")
+    print("Cleanup complete")
--- a/scientific-skills/open-notebook/scripts/test_open_notebook_skill.py
+++ b/scientific-skills/open-notebook/scripts/test_open_notebook_skill.py
@@ -0,0 +1,423 @@
+"""
+Test-Driven Development tests for the Open-Notebook skill.
+
+These tests validate the structure, content completeness, and correctness
+of the open-notebook skill implementation for the claude-scientific-skills repository.
+
+Run with: python -m pytest test_open_notebook_skill.py -v
+Or:       python -m unittest test_open_notebook_skill.py -v
+"""
+
+import json
+import os
+import re
+import unittest
+
+# Resolve paths relative to this test file
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+SKILL_DIR = os.path.dirname(SCRIPT_DIR)
+REPO_ROOT = os.path.dirname(os.path.dirname(SKILL_DIR))
+REFERENCES_DIR = os.path.join(SKILL_DIR, "references")
+SCRIPTS_DIR = SCRIPT_DIR
+SKILL_MD = os.path.join(SKILL_DIR, "SKILL.md")
+MARKETPLACE_JSON = os.path.join(REPO_ROOT, ".claude-plugin", "marketplace.json")
+
+
+class TestSkillDirectoryStructure(unittest.TestCase):
+    """Tests that the skill directory has the required structure."""
+
+    def test_skill_directory_exists(self):
+        """The open-notebook skill directory must exist."""
+        self.assertTrue(
+            os.path.isdir(SKILL_DIR),
+            f"Skill directory does not exist: {SKILL_DIR}",
+        )
+
+    def test_skill_md_exists(self):
+        """SKILL.md must exist in the skill directory."""
+        self.assertTrue(
+            os.path.isfile(SKILL_MD),
+            f"SKILL.md does not exist: {SKILL_MD}",
+        )
+
+    def test_references_directory_exists(self):
+        """A references/ directory must exist."""
+        self.assertTrue(
+            os.path.isdir(REFERENCES_DIR),
+            f"References directory does not exist: {REFERENCES_DIR}",
+        )
+
+    def test_scripts_directory_exists(self):
+        """A scripts/ directory must exist."""
+        self.assertTrue(
+            os.path.isdir(SCRIPTS_DIR),
+            f"Scripts directory does not exist: {SCRIPTS_DIR}",
+        )
+
+
+class TestSkillMdFrontmatter(unittest.TestCase):
+    """Tests that SKILL.md has correct YAML frontmatter."""
+
+    @classmethod
+    def setUpClass(cls):
+        with open(SKILL_MD, "r") as f:
+            cls.content = f.read()
+        # Extract frontmatter between --- delimiters
+        match = re.match(r"^---\n(.*?)\n---", cls.content, re.DOTALL)
+        cls.frontmatter = match.group(1) if match else ""
+
+    def test_has_yaml_frontmatter(self):
+        """SKILL.md must start with YAML frontmatter delimiters."""
+        self.assertTrue(
+            self.content.startswith("---\n"),
+            "SKILL.md must start with '---' YAML frontmatter delimiter",
+        )
+        self.assertIn(
+            "\n---\n",
+            self.content[4:],
+            "SKILL.md must have a closing '---' YAML frontmatter delimiter",
+        )
+
+    def test_frontmatter_has_name(self):
+        """Frontmatter must include a 'name' field set to 'open-notebook'."""
+        self.assertIn("name:", self.frontmatter)
+        self.assertRegex(self.frontmatter, r"name:\s*open-notebook")
+
+    def test_frontmatter_has_description(self):
+        """Frontmatter must include a 'description' field."""
+        self.assertIn("description:", self.frontmatter)
+        # Description should be substantive (at least 50 characters)
+        desc_match = re.search(r"description:\s*(.+)", self.frontmatter)
+        self.assertIsNotNone(desc_match, "description field must have content")
+        description = desc_match.group(1).strip()
+        self.assertGreater(
+            len(description),
+            50,
+            "description must be substantive (>50 chars)",
+        )
+
+    def test_frontmatter_has_license(self):
+        """Frontmatter must include a 'license' field."""
+        self.assertIn("license:", self.frontmatter)
+        self.assertRegex(self.frontmatter, r"license:\s*MIT")
+
+    def test_frontmatter_has_metadata_author(self):
+        """Frontmatter must include metadata with skill-author."""
+        self.assertIn("metadata:", self.frontmatter)
+        self.assertIn("skill-author:", self.frontmatter)
+        self.assertRegex(self.frontmatter, r"skill-author:\s*K-Dense Inc\.")
+
+
+class TestSkillMdContent(unittest.TestCase):
+    """Tests that SKILL.md has required content sections."""
+
+    @classmethod
+    def setUpClass(cls):
+        with open(SKILL_MD, "r") as f:
+            cls.content = f.read()
+
+    def test_has_title_heading(self):
+        """SKILL.md must have an H1 title heading."""
+        self.assertIsNotNone(
+            re.search(r"^# .+", self.content, flags=re.MULTILINE),
+            "SKILL.md must have an H1 title heading",
+        )
+
+    def test_has_overview_section(self):
+        """SKILL.md must have an Overview section."""
+        self.assertRegex(
+            self.content,
+            r"## Overview",
+            "Must include an Overview section",
+        )
+
+    def test_has_quick_start_section(self):
+        """SKILL.md must have a Quick Start section."""
+        self.assertRegex(
+            self.content,
+            r"## Quick Start",
+            "Must include a Quick Start section",
+        )
+
+    def test_has_docker_setup(self):
+        """SKILL.md must include Docker setup instructions."""
+        self.assertIn("docker", self.content.lower())
+        self.assertIn("docker-compose", self.content.lower())
+
+    def test_has_api_base_url(self):
+        """SKILL.md must mention the API base URL."""
+        self.assertIn("localhost:5055", self.content)
+
+    def test_mentions_notebooklm_alternative(self):
+        """SKILL.md must explain open-notebook as a NotebookLM alternative."""
+        content_lower = self.content.lower()
+        self.assertTrue(
+            "notebooklm" in content_lower or "notebook lm" in content_lower,
+            "Must mention NotebookLM as context for why open-notebook exists",
+        )
+
+    def test_mentions_self_hosted(self):
+        """SKILL.md must highlight the self-hosted/privacy aspect."""
+        content_lower = self.content.lower()
+        self.assertTrue(
+            "self-hosted" in content_lower or "privacy" in content_lower,
+            "Must highlight self-hosted/privacy benefits",
+        )
+
+    def test_mentions_multiple_ai_providers(self):
+        """SKILL.md must mention support for multiple AI providers."""
+        content_lower = self.content.lower()
+        providers_mentioned = sum(
+            1
+            for p in ["openai", "anthropic", "google", "ollama", "groq", "mistral"]
+            if p in content_lower
+        )
+        self.assertGreaterEqual(
+            providers_mentioned,
+            4,
+            "Must mention at least 4 AI providers",
+        )
+
+    def test_has_core_features_section(self):
+        """SKILL.md must describe core features."""
+        content_lower = self.content.lower()
+        features = ["notebook", "source", "note", "chat", "podcast", "search"]
+        features_found = sum(1 for f in features if f in content_lower)
+        self.assertGreaterEqual(
+            features_found,
+            5,
+            "Must describe at least 5 core features (notebook, source, note, chat, podcast, search)",
+        )
+
+    def test_has_api_reference_section(self):
+        """SKILL.md must have an API section or reference to API docs."""
+        content_lower = self.content.lower()
+        self.assertTrue(
+            "api" in content_lower and "reference" in content_lower,
+            "Must include API reference information",
+        )
+
+    def test_has_python_code_examples(self):
+        """SKILL.md must contain Python code examples."""
+        self.assertIn("```python", self.content)
+
+    def test_has_bash_code_examples(self):
+        """SKILL.md must contain bash/shell code examples."""
+        self.assertTrue(
+            "```bash" in self.content or "```shell" in self.content,
+            "Must contain bash or shell code blocks",
+        )
+
+    def test_has_installation_instructions(self):
+        """SKILL.md must include installation instructions."""
+        content_lower = self.content.lower()
+        self.assertTrue(
+            "install" in content_lower or "setup" in content_lower,
+            "Must include installation/setup instructions",
+        )
+
+    def test_has_environment_variable_info(self):
+        """SKILL.md must mention the encryption key environment variable."""
+        self.assertIn(
+            "OPEN_NOTEBOOK_ENCRYPTION_KEY",
+            self.content,
+            "Must mention the required encryption key env var",
+        )
+
+    def test_has_kdense_suggestion(self):
+        """SKILL.md must include the K-Dense Web suggestion section."""
+        self.assertIn(
+            "K-Dense Web",
+            self.content,
+            "Must include K-Dense Web suggestion",
+        )
+
+    def test_content_length_sufficient(self):
+        """SKILL.md must be substantive (at least 5000 characters)."""
+        self.assertGreater(
+            len(self.content),
+            5000,
+            "SKILL.md must be at least 5000 characters for a comprehensive skill",
+        )
+
+
+class TestReferenceFiles(unittest.TestCase):
+    """Tests that reference documentation files exist and have sufficient content."""
+
+    def _read_reference(self, filename):
+        path = os.path.join(REFERENCES_DIR, filename)
+        self.assertTrue(
+            os.path.isfile(path),
+            f"Reference file must exist: {filename}",
+        )
+        with open(path, "r") as f:
+            content = f.read()
+        return content
+
+    def test_api_reference_exists_and_comprehensive(self):
+        """references/api_reference.md must exist and cover key API endpoints."""
+        content = self._read_reference("api_reference.md")
+        self.assertGreater(len(content), 3000, "API reference must be comprehensive")
+        # Must cover core endpoint groups
+        for endpoint_group in ["notebooks", "sources", "notes", "chat", "search"]:
+            self.assertIn(
+                endpoint_group,
+                content.lower(),
+                f"API reference must cover {endpoint_group} endpoints",
+            )
+
+    def test_api_reference_has_http_methods(self):
+        """API reference must document HTTP methods."""
+        content = self._read_reference("api_reference.md")
+        for method in ["GET", "POST", "PUT", "DELETE"]:
+            self.assertIn(
+                method,
+                content,
+                f"API reference must document {method} method",
+            )
+
+    def test_examples_reference_exists(self):
+        """references/examples.md must exist with practical code examples."""
+        content = self._read_reference("examples.md")
+        self.assertGreater(len(content), 2000, "Examples must be substantive")
+        self.assertIn("```python", content, "Examples must include Python code")
+
+    def test_configuration_reference_exists(self):
+        """references/configuration.md must exist with setup details."""
+        content = self._read_reference("configuration.md")
+        self.assertGreater(len(content), 1500, "Configuration guide must be substantive")
+        content_lower = content.lower()
+        self.assertTrue(
+            "docker" in content_lower,
+            "Configuration must cover Docker setup",
+        )
+        self.assertTrue(
+            "environment" in content_lower or "env" in content_lower,
+            "Configuration must cover environment variables",
+        )
+
+    def test_architecture_reference_exists(self):
+        """references/architecture.md must exist explaining the system."""
+        content = self._read_reference("architecture.md")
+        self.assertGreater(len(content), 1000, "Architecture doc must be substantive")
+        content_lower = content.lower()
+        for component in ["fastapi", "surrealdb", "langchain"]:
+            self.assertIn(
+                component,
+                content_lower,
+                f"Architecture must mention {component}",
+            )
+
+
+class TestExampleScripts(unittest.TestCase):
+    """Tests that example scripts exist and are valid Python."""
+
+    def _check_script(self, filename):
+        path = os.path.join(SCRIPTS_DIR, filename)
+        self.assertTrue(
+            os.path.isfile(path),
+            f"Script must exist: {filename}",
+        )
+        with open(path, "r") as f:
+            content = f.read()
+        # Verify it's valid Python syntax
+        try:
+            compile(content, filename, "exec")
+        except SyntaxError as e:
+            self.fail(f"Script {filename} has invalid Python syntax: {e}")
+        return content
+
+    def test_notebook_management_script_exists(self):
+        """A notebook management example script must exist."""
+        content = self._check_script("notebook_management.py")
+        self.assertIn("notebook", content.lower())
+        self.assertIn("requests", content.lower())
+
+    def test_source_ingestion_script_exists(self):
+        """A source ingestion example script must exist."""
+        content = self._check_script("source_ingestion.py")
+        self.assertIn("source", content.lower())
+
+    def test_chat_interaction_script_exists(self):
+        """A chat interaction example script must exist."""
+        content = self._check_script("chat_interaction.py")
+        self.assertIn("chat", content.lower())
+
+
+class TestMarketplaceJson(unittest.TestCase):
+    """Tests that marketplace.json includes the open-notebook skill."""
+
+    @classmethod
+    def setUpClass(cls):
+        with open(MARKETPLACE_JSON, "r") as f:
+            cls.marketplace = json.load(f)
+
+    def test_marketplace_has_open_notebook_skill(self):
+        """marketplace.json must list the open-notebook skill."""
+        skills = self.marketplace["plugins"][0]["skills"]
+        skill_path = "./scientific-skills/open-notebook"
+        self.assertIn(
+            skill_path,
+            skills,
+            f"marketplace.json must include '{skill_path}' in the skills list",
+        )
+
+    def test_marketplace_valid_json(self):
+        """marketplace.json must be valid JSON with expected structure."""
+        self.assertIn("plugins", self.marketplace)
+        self.assertIsInstance(self.marketplace["plugins"], list)
+        self.assertGreater(len(self.marketplace["plugins"]), 0)
+        self.assertIn("skills", self.marketplace["plugins"][0])
+
+
+class TestSkillMdApiEndpointCoverage(unittest.TestCase):
+    """Tests that SKILL.md or reference docs cover key API endpoint categories."""
+
+    @classmethod
+    def setUpClass(cls):
+        with open(SKILL_MD, "r") as f:
+            cls.skill_content = f.read()
+        api_ref_path = os.path.join(REFERENCES_DIR, "api_reference.md")
+        with open(api_ref_path, "r") as f:
+            cls.api_content = f.read()
+        cls.combined = cls.skill_content + cls.api_content
+
+    def test_covers_notebook_endpoints(self):
+        """Must document notebook management endpoints."""
+        self.assertIn("/notebooks", self.api_content)
+
+    def test_covers_source_endpoints(self):
+        """Must document source management endpoints."""
+        self.assertIn("/sources", self.api_content)
+
+    def test_covers_note_endpoints(self):
+        """Must document note management endpoints."""
+        self.assertIn("/notes", self.api_content)
+
+    def test_covers_chat_endpoints(self):
+        """Must document chat endpoints."""
+        self.assertIn("/chat", self.api_content)
+
+    def test_covers_search_endpoints(self):
+        """Must document search endpoints."""
+        self.assertIn("/search", self.api_content)
+
+    def test_covers_podcast_endpoints(self):
+        """Must document podcast endpoints."""
+        self.assertIn("/podcasts", self.api_content)
+
+    def test_covers_transformation_endpoints(self):
+        """Must document transformation endpoints."""
+        self.assertIn("/transformations", self.api_content)
+
+    def test_covers_model_management(self):
+        """Must document model management endpoints."""
+        self.assertIn("/models", self.api_content)
+
+    def test_covers_credential_management(self):
+        """Must document credential management endpoints."""
+        self.assertIn("/credentials", self.api_content)
+
+
+if __name__ == "__main__":
+    unittest.main()