Add USPTO and OpenTargets databases

This commit is contained in:
Timothy Kassis
2025-10-20 22:34:37 -07:00
parent 6e10c5e8da
commit 16143f1167
15 changed files with 4365 additions and 2 deletions

View File

@@ -7,7 +7,7 @@
}, },
"metadata": { "metadata": {
"description": "Claude scientific skills from K-Dense Inc", "description": "Claude scientific skills from K-Dense Inc",
"version": "1.30.0" "version": "1.32.0"
}, },
"plugins": [ "plugins": [
{ {
@@ -79,12 +79,14 @@
"./scientific-databases/hmdb-database", "./scientific-databases/hmdb-database",
"./scientific-databases/kegg-database", "./scientific-databases/kegg-database",
"./scientific-databases/metabolomics-workbench-database", "./scientific-databases/metabolomics-workbench-database",
"./scientific-databases/opentargets-database",
"./scientific-databases/pdb-database", "./scientific-databases/pdb-database",
"./scientific-databases/pubchem-database", "./scientific-databases/pubchem-database",
"./scientific-databases/pubmed-database", "./scientific-databases/pubmed-database",
"./scientific-databases/reactome-database", "./scientific-databases/reactome-database",
"./scientific-databases/string-database", "./scientific-databases/string-database",
"./scientific-databases/uniprot-database", "./scientific-databases/uniprot-database",
"./scientific-databases/uspto-database",
"./scientific-databases/zinc-database" "./scientific-databases/zinc-database"
] ]
}, },

View File

@@ -2,7 +2,7 @@
A comprehensive collection of ready-to-use scientific skills for Claude, curated by the K-Dense team. These skills enable Claude to work with specialized scientific libraries and databases across bioinformatics, cheminformatics, machine learning, materials science, and data analysis. Using these set of skills with Claude Code allows you to create an 'AI Scientist' on your desktop! If you want substantially more advanced capabilties, compute infrastructure and enterprise ready offering check out https://k-dense.ai/. A comprehensive collection of ready-to-use scientific skills for Claude, curated by the K-Dense team. These skills enable Claude to work with specialized scientific libraries and databases across bioinformatics, cheminformatics, machine learning, materials science, and data analysis. Using these set of skills with Claude Code allows you to create an 'AI Scientist' on your desktop! If you want substantially more advanced capabilties, compute infrastructure and enterprise ready offering check out https://k-dense.ai/.
This repository provides access to **22 scientific databases**, **40 scientific packages**, **5 scientific integrations**, and **122 documented workflows** covering a wide range of scientific computing tasks. This repository provides access to **24 scientific databases**, **40 scientific packages**, **5 scientific integrations**, and **122 documented workflows** covering a wide range of scientific computing tasks.
## Getting Started ## Getting Started
@@ -40,6 +40,7 @@ After installing the plugin, you can use the skill by just mentioning it. Additi
- **HMDB (Human Metabolome Database)** - Comprehensive metabolomics resource with 220K+ metabolite entries, detailed chemical/biological data, concentration ranges, disease associations, pathways, and spectral data for metabolite identification and biomarker discovery - **HMDB (Human Metabolome Database)** - Comprehensive metabolomics resource with 220K+ metabolite entries, detailed chemical/biological data, concentration ranges, disease associations, pathways, and spectral data for metabolite identification and biomarker discovery
- **KEGG** - Kyoto Encyclopedia of Genes and Genomes for biological pathway analysis, gene-to-pathway mapping, compound searches, and molecular interaction networks (pathway enrichment, metabolic pathways, gene annotations, drug-drug interactions, ID conversion) - **KEGG** - Kyoto Encyclopedia of Genes and Genomes for biological pathway analysis, gene-to-pathway mapping, compound searches, and molecular interaction networks (pathway enrichment, metabolic pathways, gene annotations, drug-drug interactions, ID conversion)
- **Metabolomics Workbench** - NIH Common Fund metabolomics data repository with 4,200+ processed studies, standardized nomenclature (RefMet), mass spectrometry searches, and comprehensive REST API for accessing metabolite structures, study metadata, experimental results, and gene/protein-metabolite associations - **Metabolomics Workbench** - NIH Common Fund metabolomics data repository with 4,200+ processed studies, standardized nomenclature (RefMet), mass spectrometry searches, and comprehensive REST API for accessing metabolite structures, study metadata, experimental results, and gene/protein-metabolite associations
- **Open Targets** - Comprehensive therapeutic target identification and validation platform integrating genetics, omics, and chemical data (200M+ evidence strings, target-disease associations with scoring, tractability assessments, safety liabilities, known drugs from ChEMBL, GraphQL API) for drug target discovery, prioritization, evidence evaluation, drug repurposing, competitive intelligence, and mechanism research
- **NCBI Gene** - Work with NCBI Gene database to search, retrieve, and analyze gene information including nomenclature, sequences, variations, phenotypes, and pathways using E-utilities and Datasets API - **NCBI Gene** - Work with NCBI Gene database to search, retrieve, and analyze gene information including nomenclature, sequences, variations, phenotypes, and pathways using E-utilities and Datasets API
- **Protein Data Bank (PDB)** - Access 3D structural data of proteins, nucleic acids, and biological macromolecules (200K+ structures) with search, retrieval, and analysis capabilities - **Protein Data Bank (PDB)** - Access 3D structural data of proteins, nucleic acids, and biological macromolecules (200K+ structures) with search, retrieval, and analysis capabilities
- **PubChem** - Access chemical compound data from the world's largest free chemical database (110M+ compounds, 270M+ bioactivities) - **PubChem** - Access chemical compound data from the world's largest free chemical database (110M+ compounds, 270M+ bioactivities)
@@ -47,6 +48,7 @@ After installing the plugin, you can use the skill by just mentioning it. Additi
- **Reactome** - Curated pathway database for biological processes and molecular interactions (2,825+ human pathways, 16K+ reactions, 11K+ proteins) with pathway enrichment analysis, expression data analysis, and species comparison using Content Service and Analysis Service APIs - **Reactome** - Curated pathway database for biological processes and molecular interactions (2,825+ human pathways, 16K+ reactions, 11K+ proteins) with pathway enrichment analysis, expression data analysis, and species comparison using Content Service and Analysis Service APIs
- **STRING** - Protein-protein interaction network database (5000+ genomes, 59.3M proteins, 20B+ interactions) with functional enrichment analysis, interaction partner discovery, and network visualization from experimental data, computational prediction, and text-mining - **STRING** - Protein-protein interaction network database (5000+ genomes, 59.3M proteins, 20B+ interactions) with functional enrichment analysis, interaction partner discovery, and network visualization from experimental data, computational prediction, and text-mining
- **UniProt** - Universal Protein Resource for protein sequences, annotations, and functional information (UniProtKB/Swiss-Prot reviewed entries, TrEMBL unreviewed entries) with REST API access for search, retrieval, ID mapping, and batch operations across 200+ databases - **UniProt** - Universal Protein Resource for protein sequences, annotations, and functional information (UniProtKB/Swiss-Prot reviewed entries, TrEMBL unreviewed entries) with REST API access for search, retrieval, ID mapping, and batch operations across 200+ databases
- **USPTO** - United States Patent and Trademark Office data access including patent searches, trademark lookups, patent examination history (PEDS), office actions, assignments, citations, and litigation records; supports PatentSearch API (ElasticSearch-based patent search), TSDR (Trademark Status & Document Retrieval), Patent/Trademark Assignment APIs, and additional specialized APIs for comprehensive IP analysis
- **ZINC** - Free database of commercially-available compounds for virtual screening and drug discovery (230M+ purchasable compounds in ready-to-dock 3D formats) - **ZINC** - Free database of commercially-available compounds for virtual screening and drug discovery (230M+ purchasable compounds in ready-to-dock 3D formats)
### Scientific Packages ### Scientific Packages

View File

@@ -0,0 +1,367 @@
---
name: opentargets-database
description: Access and query the Open Targets Platform, a comprehensive resource for therapeutic target identification and validation. Use this skill when working with drug target discovery, investigating target-disease associations, evaluating target tractability and safety, retrieving evidence from genetics/omics/literature supporting target-disease links, finding known drugs for diseases, assessing druggability of genes, or analyzing gene/disease/drug relationships for drug discovery and development.
---
# Open Targets Database
## Overview
The Open Targets Platform is a comprehensive resource that supports systematic identification and prioritization of potential therapeutic drug targets. It integrates publicly available datasets including human genetics, omics, literature, and chemical data to build and score target-disease associations.
**Key capabilities:**
- Query target (gene) annotations including tractability, safety, expression
- Search for disease-target associations with evidence scores
- Retrieve evidence from multiple data types (genetics, pathways, literature, etc.)
- Find known drugs for diseases and their mechanisms
- Access drug information including clinical trial phases and adverse events
- Evaluate target druggability and therapeutic potential
**Data access:** The platform provides a GraphQL API, web interface, data downloads, and Google BigQuery access. This skill focuses on the GraphQL API for programmatic access.
## When to Use This Skill
Use this skill when:
- **Target discovery:** Finding potential therapeutic targets for a disease
- **Target assessment:** Evaluating tractability, safety, and druggability of genes
- **Evidence gathering:** Retrieving supporting evidence for target-disease associations
- **Drug repurposing:** Identifying existing drugs that could be repurposed for new indications
- **Competitive intelligence:** Understanding clinical precedence and drug development landscape
- **Target prioritization:** Ranking targets based on genetic evidence and other data types
- **Mechanism research:** Investigating biological pathways and gene functions
- **Biomarker discovery:** Finding genes differentially expressed in disease
- **Safety assessment:** Identifying potential toxicity concerns for drug targets
## Core Workflow
### 1. Search for Entities
Start by finding the identifiers for targets, diseases, or drugs of interest.
**For targets (genes):**
```python
from scripts.query_opentargets import search_entities
# Search by gene symbol or name
results = search_entities("BRCA1", entity_types=["target"])
# Returns: [{"id": "ENSG00000012048", "name": "BRCA1", ...}]
```
**For diseases:**
```python
# Search by disease name
results = search_entities("alzheimer", entity_types=["disease"])
# Returns: [{"id": "EFO_0000249", "name": "Alzheimer disease", ...}]
```
**For drugs:**
```python
# Search by drug name
results = search_entities("aspirin", entity_types=["drug"])
# Returns: [{"id": "CHEMBL25", "name": "ASPIRIN", ...}]
```
**Identifiers used:**
- Targets: Ensembl gene IDs (e.g., `ENSG00000157764`)
- Diseases: EFO (Experimental Factor Ontology) IDs (e.g., `EFO_0000249`)
- Drugs: ChEMBL IDs (e.g., `CHEMBL25`)
### 2. Query Target Information
Retrieve comprehensive target annotations to assess druggability and biology.
```python
from scripts.query_opentargets import get_target_info
target_info = get_target_info("ENSG00000157764", include_diseases=True)
# Access key fields:
# - approvedSymbol: HGNC gene symbol
# - approvedName: Full gene name
# - tractability: Druggability assessments across modalities
# - safetyLiabilities: Known safety concerns
# - geneticConstraint: Constraint scores from gnomAD
# - associatedDiseases: Top disease associations with scores
```
**Key annotations to review:**
- **Tractability:** Small molecule, antibody, PROTAC druggability predictions
- **Safety:** Known toxicity concerns from multiple databases
- **Genetic constraint:** pLI and LOEUF scores indicating essentiality
- **Disease associations:** Diseases linked to the target with evidence scores
Refer to `references/target_annotations.md` for detailed information about all target features.
### 3. Query Disease Information
Get disease details and associated targets/drugs.
```python
from scripts.query_opentargets import get_disease_info
disease_info = get_disease_info("EFO_0000249", include_targets=True)
# Access fields:
# - name: Disease name
# - description: Disease description
# - therapeuticAreas: High-level disease categories
# - associatedTargets: Top targets with association scores
```
### 4. Retrieve Target-Disease Evidence
Get detailed evidence supporting a target-disease association.
```python
from scripts.query_opentargets import get_target_disease_evidence
# Get all evidence
evidence = get_target_disease_evidence(
ensembl_id="ENSG00000157764",
efo_id="EFO_0000249"
)
# Filter by evidence type
genetic_evidence = get_target_disease_evidence(
ensembl_id="ENSG00000157764",
efo_id="EFO_0000249",
data_types=["genetic_association"]
)
# Each evidence record contains:
# - datasourceId: Specific data source (e.g., "gwas_catalog", "chembl")
# - datatypeId: Evidence category (e.g., "genetic_association", "known_drug")
# - score: Evidence strength (0-1)
# - studyId: Original study identifier
# - literature: Associated publications
```
**Major evidence types:**
1. **genetic_association:** GWAS, rare variants, ClinVar, gene burden
2. **somatic_mutation:** Cancer Gene Census, IntOGen, cancer biomarkers
3. **known_drug:** Clinical precedence from approved/clinical drugs
4. **affected_pathway:** CRISPR screens, pathway analyses, gene signatures
5. **rna_expression:** Differential expression from Expression Atlas
6. **animal_model:** Mouse phenotypes from IMPC
7. **literature:** Text-mining from Europe PMC
Refer to `references/evidence_types.md` for detailed descriptions of all evidence types and interpretation guidelines.
### 5. Find Known Drugs
Identify drugs used for a disease and their targets.
```python
from scripts.query_opentargets import get_known_drugs_for_disease
drugs = get_known_drugs_for_disease("EFO_0000249")
# drugs contains:
# - uniqueDrugs: Total number of unique drugs
# - uniqueTargets: Total number of unique targets
# - rows: List of drug-target-indication records with:
# - drug: {name, drugType, maximumClinicalTrialPhase}
# - targets: Genes targeted by the drug
# - phase: Clinical trial phase for this indication
# - status: Trial status (active, completed, etc.)
# - mechanismOfAction: How drug works
```
**Clinical phases:**
- Phase 4: Approved drug
- Phase 3: Late-stage clinical trials
- Phase 2: Mid-stage trials
- Phase 1: Early safety trials
### 6. Get Drug Information
Retrieve detailed drug information including mechanisms and indications.
```python
from scripts.query_opentargets import get_drug_info
drug_info = get_drug_info("CHEMBL25")
# Access:
# - name, synonyms: Drug identifiers
# - drugType: Small molecule, antibody, etc.
# - maximumClinicalTrialPhase: Development stage
# - mechanismsOfAction: Target and action type
# - indications: Diseases with trial phases
# - withdrawnNotice: If withdrawn, reasons and countries
```
### 7. Get All Associations for a Target
Find all diseases associated with a target, optionally filtering by score.
```python
from scripts.query_opentargets import get_target_associations
# Get associations with score >= 0.5
associations = get_target_associations(
ensembl_id="ENSG00000157764",
min_score=0.5
)
# Each association contains:
# - disease: {id, name}
# - score: Overall association score (0-1)
# - datatypeScores: Breakdown by evidence type
```
**Association scores:**
- Range: 0-1 (higher = stronger evidence)
- Aggregate evidence across all data types using harmonic sum
- NOT confidence scores but relative ranking metrics
- Under-studied diseases may have lower scores despite good evidence
## GraphQL API Details
**For custom queries beyond the provided helper functions**, use the GraphQL API directly or modify `scripts/query_opentargets.py`.
Key information:
- **Endpoint:** `https://api.platform.opentargets.org/api/v4/graphql`
- **Interactive browser:** `https://api.platform.opentargets.org/api/v4/graphql/browser`
- **No authentication required**
- **Request only needed fields** to minimize response size
- **Use pagination** for large result sets: `page: {size: N, index: M}`
Refer to `references/api_reference.md` for:
- Complete endpoint documentation
- Example queries for all entity types
- Error handling patterns
- Best practices for API usage
## Best Practices
### Target Prioritization Strategy
When prioritizing drug targets:
1. **Start with genetic evidence:** Human genetics (GWAS, rare variants) provides strongest disease relevance
2. **Check tractability:** Prefer targets with clinical or discovery precedence
3. **Assess safety:** Review safety liabilities, expression patterns, and genetic constraint
4. **Evaluate clinical precedence:** Known drugs indicate druggability and therapeutic window
5. **Consider multiple evidence types:** Convergent evidence from different sources increases confidence
6. **Validate mechanistically:** Pathway evidence and biological plausibility
7. **Review literature manually:** For critical decisions, examine primary publications
### Evidence Interpretation
**Strong evidence indicators:**
- Multiple independent evidence sources
- High genetic association scores (especially GWAS with L2G > 0.5)
- Clinical precedence from approved drugs
- ClinVar pathogenic variants with disease match
- Mouse models with relevant phenotypes
**Caution flags:**
- Single evidence source only
- Text-mining as sole evidence (requires manual validation)
- Conflicting evidence across sources
- High essentiality + ubiquitous expression (poor therapeutic window)
- Multiple safety liabilities
**Score interpretation:**
- Scores rank relative strength, not absolute confidence
- Under-studied diseases have lower scores despite potentially valid targets
- Weight expert-curated sources higher than computational predictions
- Check evidence breakdown, not just overall score
### Common Workflows
**Workflow 1: Target Discovery for a Disease**
1. Search for disease → get EFO ID
2. Query disease info with `include_targets=True`
3. Review top targets sorted by association score
4. For promising targets, get detailed target info
5. Examine evidence types supporting each association
6. Assess tractability and safety for prioritized targets
**Workflow 2: Target Validation**
1. Search for target → get Ensembl ID
2. Get comprehensive target info
3. Check tractability (especially clinical precedence)
4. Review safety liabilities and genetic constraint
5. Examine disease associations to understand biology
6. Look for chemical probes or tool compounds
7. Check known drugs targeting gene for mechanism insights
**Workflow 3: Drug Repurposing**
1. Search for disease → get EFO ID
2. Get known drugs for disease
3. For each drug, get detailed drug info
4. Examine mechanisms of action and targets
5. Look for related disease indications
6. Assess clinical trial phases and status
7. Identify repurposing opportunities based on mechanism
**Workflow 4: Competitive Intelligence**
1. Search for target of interest
2. Get associated diseases with evidence
3. For each disease, get known drugs
4. Review clinical phases and development status
5. Identify competitors and their mechanisms
6. Assess clinical precedence and market landscape
## Resources
### Scripts
**scripts/query_opentargets.py**
Helper functions for common API operations:
- `search_entities()` - Search for targets, diseases, or drugs
- `get_target_info()` - Retrieve target annotations
- `get_disease_info()` - Retrieve disease information
- `get_target_disease_evidence()` - Get supporting evidence
- `get_known_drugs_for_disease()` - Find drugs for a disease
- `get_drug_info()` - Retrieve drug details
- `get_target_associations()` - Get all associations for a target
- `execute_query()` - Execute custom GraphQL queries
### References
**references/api_reference.md**
Complete GraphQL API documentation including:
- Endpoint details and authentication
- Available query types (target, disease, drug, search)
- Example queries for all common operations
- Error handling and best practices
- Data licensing and citation requirements
**references/evidence_types.md**
Comprehensive guide to evidence types and data sources:
- Detailed descriptions of all 7 major evidence types
- Scoring methodologies for each source
- Evidence interpretation guidelines
- Strengths and limitations of each evidence type
- Quality assessment recommendations
**references/target_annotations.md**
Complete target annotation reference:
- 12 major annotation categories explained
- Tractability assessment details
- Safety liability sources
- Expression, essentiality, and constraint data
- Interpretation guidelines for target prioritization
- Red flags and green flags for target assessment
## Data Updates and Versioning
The Open Targets Platform is updated **quarterly** with new data releases. The current release (as of October 2025) is available at the API endpoint.
**Release information:** Check https://platform-docs.opentargets.org/release-notes for the latest updates.
**Citation:** When using Open Targets data, cite:
Ochoa, D. et al. (2025) Open Targets Platform: facilitating therapeutic hypotheses building in drug discovery. Nucleic Acids Research, 53(D1):D1467-D1477.
## Limitations and Considerations
1. **API is for exploratory queries:** For systematic analyses of many targets/diseases, use data downloads or BigQuery
2. **Scores are relative, not absolute:** Association scores rank evidence strength but don't predict clinical success
3. **Under-studied diseases score lower:** Novel or rare diseases may have strong evidence but lower aggregate scores
4. **Evidence quality varies:** Weight expert-curated sources higher than computational predictions
5. **Requires biological interpretation:** Scores and evidence must be interpreted in biological and clinical context
6. **No authentication required:** All data is freely accessible, but cite appropriately

View File

@@ -0,0 +1,249 @@
# Open Targets Platform API Reference
## API Endpoint
```
https://api.platform.opentargets.org/api/v4/graphql
```
Interactive GraphQL playground with documentation:
```
https://api.platform.opentargets.org/api/v4/graphql/browser
```
## Access Methods
The Open Targets Platform provides multiple access methods:
1. **GraphQL API** - Best for single entity queries and flexible data retrieval
2. **Web Interface** - Interactive platform at https://platform.opentargets.org
3. **Data Downloads** - FTP at https://ftp.ebi.ac.uk/pub/databases/opentargets/platform/
4. **Google BigQuery** - For large-scale systematic queries
## Authentication
No authentication is required for the GraphQL API. All data is freely accessible.
## Rate Limits
For systematic queries involving multiple targets or diseases, use dataset downloads or BigQuery instead of repeated API calls. The API is optimized for single-entity and exploratory queries.
## GraphQL Query Structure
GraphQL queries consist of:
1. Query operation with optional variables
2. Field selection (request only needed fields)
3. Nested entity traversal
### Basic Python Example
```python
import requests
import json
# Define the query
query_string = """
query target($ensemblId: String!){
target(ensemblId: $ensemblId){
id
approvedSymbol
biotype
geneticConstraint {
constraintType
exp
obs
score
}
}
}
"""
# Define variables
variables = {"ensemblId": "ENSG00000169083"}
# Make the request
base_url = "https://api.platform.opentargets.org/api/v4/graphql"
response = requests.post(base_url, json={"query": query_string, "variables": variables})
data = json.loads(response.text)
print(data)
```
## Available Query Endpoints
### /target
Retrieve gene annotations, tractability assessments, and disease associations.
**Common fields:**
- `id` - Ensembl gene ID
- `approvedSymbol` - HGNC gene symbol
- `approvedName` - Full gene name
- `biotype` - Gene type (protein_coding, etc.)
- `tractability` - Druggability assessment
- `safetyLiabilities` - Safety information
- `expressions` - Baseline expression data
- `knownDrugs` - Approved/clinical drugs
- `associatedDiseases` - Disease associations with evidence
### /disease
Retrieve disease/phenotype data, known drugs, and clinical information.
**Common fields:**
- `id` - EFO disease identifier
- `name` - Disease name
- `description` - Disease description
- `therapeuticAreas` - High-level disease categories
- `synonyms` - Alternative names
- `knownDrugs` - Drugs indicated for disease
- `associatedTargets` - Target associations with evidence
### /drug
Retrieve compound details, mechanisms of action, and pharmacovigilance data.
**Common fields:**
- `id` - ChEMBL identifier
- `name` - Drug name
- `drugType` - Small molecule, antibody, etc.
- `maximumClinicalTrialPhase` - Development stage
- `indications` - Disease indications
- `mechanismsOfAction` - Target mechanisms
- `adverseEvents` - Pharmacovigilance data
### /search
Search across all entities (targets, diseases, drugs).
**Parameters:**
- `queryString` - Search term
- `entityNames` - Filter by entity type(s)
- `page` - Pagination
### /associationDiseaseIndirect
Retrieve target-disease associations including indirect evidence from disease descendants in ontology.
**Key fields:**
- `rows` - Association records with scores
- `aggregations` - Aggregated statistics
## Example Queries
### Query 1: Get target information with disease associations
```python
query = """
query targetInfo($ensemblId: String!) {
target(ensemblId: $ensemblId) {
approvedSymbol
approvedName
tractability {
label
modality
value
}
associatedDiseases(page: {size: 10}) {
rows {
disease {
name
}
score
datatypeScores {
componentId
score
}
}
}
}
}
"""
variables = {"ensemblId": "ENSG00000157764"}
```
### Query 2: Search for diseases
```python
query = """
query searchDiseases($queryString: String!) {
search(queryString: $queryString, entityNames: ["disease"]) {
hits {
id
entity
name
description
}
}
}
"""
variables = {"queryString": "alzheimer"}
```
### Query 3: Get evidence for target-disease pair
```python
query = """
query evidences($ensemblId: String!, $efoId: String!) {
disease(efoId: $efoId) {
evidences(ensemblIds: [$ensemblId], size: 100) {
rows {
datasourceId
datatypeId
score
studyId
literature
}
}
}
}
"""
variables = {"ensemblId": "ENSG00000157764", "efoId": "EFO_0000249"}
```
### Query 4: Get known drugs for a disease
```python
query = """
query knownDrugs($efoId: String!) {
disease(efoId: $efoId) {
knownDrugs {
uniqueDrugs
rows {
drug {
name
id
}
targets {
approvedSymbol
}
phase
status
}
}
}
}
"""
variables = {"efoId": "EFO_0000249"}
```
## Error Handling
GraphQL returns status code 200 even for errors. Check the response structure:
```python
if 'errors' in response_data:
print(f"GraphQL errors: {response_data['errors']}")
else:
print(f"Data: {response_data['data']}")
```
## Best Practices
1. **Request only needed fields** - Minimize data transfer and improve response time
2. **Use variables** - Make queries reusable and safer
3. **Handle pagination** - Most list fields support pagination with `page: {size: N, index: M}`
4. **Explore the schema** - Use the GraphQL browser to discover available fields
5. **Batch related queries** - Combine multiple entity fetches in a single query when possible
6. **Cache results** - Store frequently accessed data locally to reduce API calls
7. **Use BigQuery for bulk** - Switch to BigQuery/downloads for systematic analyses
## Data Licensing
All Open Targets Platform data is freely available. When using the data in research or commercial products, cite the latest publication:
Ochoa, D. et al. (2025) Open Targets Platform: facilitating therapeutic hypotheses building in drug discovery. Nucleic Acids Research, 53(D1):D1467-D1477.

View File

@@ -0,0 +1,306 @@
# Evidence Types and Data Sources
## Overview
Evidence represents any event or set of events that identifies a target as a potential causal gene or protein for a disease. Evidence is standardized and mapped to:
- **Ensembl gene IDs** for targets
- **EFO (Experimental Factor Ontology)** for diseases/phenotypes
Evidence is organized into **data types** (broader categories) and **data sources** (specific databases/studies).
## Evidence Data Types
### 1. Genetic Association
Evidence from human genetics linking genetic variants to disease phenotypes.
#### Data Sources:
**GWAS (Genome-Wide Association Studies)**
- Population-level common variant associations
- Filtered with Locus-to-Gene (L2G) scores >0.05
- Includes fine-mapping and colocalization data
- Sources: GWAS Catalog, FinnGen, UK Biobank, EBI GWAS
**Gene Burden Tests**
- Rare variant association analyses
- Aggregate effects of multiple rare variants in a gene
- Particularly relevant for Mendelian and rare diseases
**ClinVar Germline**
- Clinical variant interpretations
- Classifications: pathogenic, likely pathogenic, VUS, benign
- Expert-reviewed variant-disease associations
**Genomics England PanelApp**
- Expert gene-disease ratings
- Green (confirmed), amber (probable), red (no evidence)
- Focus on rare diseases and cancer
**Gene2Phenotype**
- Curated gene-disease relationships
- Allelic requirements and inheritance patterns
- Clinical validity assessments
**UniProt Literature & Variants**
- Literature-based gene-disease associations
- Expert-curated from scientific publications
**Orphanet**
- Rare disease gene associations
- Expert-reviewed and maintained
**ClinGen**
- Clinical genome resource classifications
- Gene-disease validity assertions
### 2. Somatic Mutations
Evidence from cancer genomics identifying driver genes and therapeutic targets.
#### Data Sources:
**Cancer Gene Census**
- Expert-curated cancer genes
- Tier classifications (1 = strong evidence, 2 = emerging)
- Mutation types and cancer types
**IntOGen**
- Computational driver gene predictions
- Aggregated from large cohort studies
- Statistical significance of mutations
**ClinVar Somatic**
- Somatic clinical variant interpretations
- Oncogenic/likely oncogenic classifications
**Cancer Biomarkers**
- FDA/EMA approved biomarkers
- Clinical trial biomarkers
- Prognostic and predictive markers
### 3. Known Drugs
Evidence from clinical precedence showing drugs targeting genes for disease indications.
#### Data Source:
**ChEMBL**
- Approved drugs (Phase 4)
- Clinical candidates (Phase 1-3)
- Withdrawn drugs
- Drug-target-indication triplets with mechanism of action
**Clinical Trial Information:**
- `phase`: Maximum clinical trial phase (1, 2, 3, 4)
- `status`: Active, terminated, completed, withdrawn
- `mechanismOfAction`: How drug affects target
### 4. Affected Pathways
Evidence linking genes to disease through pathway perturbations and functional screens.
#### Data Sources:
**CRISPR Screens**
- Genome-scale knockout screens
- Cancer dependency and essentiality data
**Project Score (Cancer Dependency Map)**
- CRISPR-Cas9 fitness screens across cancer cell lines
- Gene essentiality profiles
**SLAPenrich**
- Pathway enrichment analysis
- Somatic mutation pathway impacts
**PROGENy**
- Pathway activity inference
- Signaling pathway perturbations
**Reactome**
- Expert-curated pathway annotations
- Biological pathway representations
**Gene Signatures**
- Expression-based signatures
- Pathway activity patterns
### 5. RNA Expression
Evidence from differential gene expression in disease vs. control tissues.
#### Data Source:
**Expression Atlas**
- Differential expression data
- Baseline expression across tissues/conditions
- RNA-Seq and microarray studies
- Log2 fold-change and p-values
### 6. Animal Models
Evidence from in vivo studies showing phenotypes associated with gene perturbations.
#### Data Source:
**IMPC (International Mouse Phenotyping Consortium)**
- Systematic mouse knockout phenotypes
- Phenotype-disease mappings via ontologies
- Standardized phenotyping procedures
### 7. Literature
Evidence from text-mining of biomedical literature.
#### Data Source:
**Europe PMC**
- Co-occurrence of genes and diseases in abstracts
- Normalized citation counts
- Weighted by publication type and recency
## Evidence Scoring
Each evidence source has its own scoring methodology:
### Score Ranges
- Most scores normalized to 0-1 range
- Higher scores indicate stronger evidence
- Scores are NOT confidence levels but relative strength indicators
### Common Scoring Approaches:
**Binary Classifications:**
- ClinVar: Pathogenic (1.0), Likely pathogenic (0.99), etc.
- Gene2Phenotype: Confirmed/probable ratings
- PanelApp: Green/amber/red classifications
**Statistical Measures:**
- GWAS: L2G scores incorporating multiple lines of evidence
- Gene Burden: Statistical significance of variant aggregation
- Expression: Adjusted p-values and fold-changes
**Clinical Precedence:**
- Known Drugs: Phase weights (Phase 4 = 1.0, Phase 3 = 0.8, etc.)
- Clinical status modifiers
**Computational Predictions:**
- IntOGen: Q-values from driver mutation analysis
- PROGENy/SLAPenrich: Pathway activity/enrichment scores
## Evidence Interpretation Guidelines
### Strengths by Data Type
**Genetic Association** - Strongest human genetic evidence
- Direct link between genetic variation and disease
- Mendelian diseases: high confidence
- GWAS: requires L2G to identify causal gene
- Consider ancestry and population-specific effects
**Somatic Mutations** - Direct evidence in cancer
- Strong for oncology indications
- Driver mutations indicate therapeutic potential
- Consider cancer type specificity
**Known Drugs** - Clinical validation
- Highest confidence: approved drugs (Phase 4)
- Consider mechanism relevance to new indication
- Phase 1-2: early evidence, higher risk
**Affected Pathways** - Mechanistic insights
- Supports biological plausibility
- May not predict clinical success
- Useful for hypothesis generation
**RNA Expression** - Observational evidence
- Correlation, not causation
- May reflect disease consequence vs. cause
- Useful for biomarker identification
**Animal Models** - Translational evidence
- Strong for understanding biology
- Variable translation to human disease
- Most useful when phenotype matches human disease
**Literature** - Exploratory signal
- Text-mining captures research focus
- May reflect publication bias
- Requires manual literature review for validation
### Important Considerations
1. **Multiple evidence types strengthen confidence** - Convergent evidence from different data types provides stronger support
2. **Under-studied diseases score lower** - Novel or rare diseases may have strong evidence but lower aggregate scores due to limited research
3. **Association scores are not probabilities** - Scores rank relative evidence strength, not success probability
4. **Context matters** - Evidence strength depends on:
- Disease mechanism understanding
- Target biology and druggability
- Clinical precedence in related indications
- Safety considerations
5. **Data source reliability varies** - Weight expert-curated sources (ClinGen, Gene2Phenotype) higher than computational predictions
## Using Evidence in Queries
### Filtering by Data Type
```python
query = """
query evidenceByType($ensemblId: String!, $efoId: String!, $dataTypes: [String!]) {
disease(efoId: $efoId) {
evidences(ensemblIds: [$ensemblId], datatypes: $dataTypes) {
rows {
datasourceId
score
}
}
}
}
"""
variables = {
"ensemblId": "ENSG00000157764",
"efoId": "EFO_0000249",
"dataTypes": ["genetic_association", "somatic_mutation"]
}
```
### Accessing Data Type Scores
Data type scores aggregate all source scores within that type:
```python
query = """
query associationScores($ensemblId: String!, $efoId: String!) {
target(ensemblId: $ensemblId) {
associatedDiseases(efoIds: [$efoId]) {
rows {
disease {
name
}
score
datatypeScores {
componentId
score
}
}
}
}
}
"""
```
## Evidence Quality Assessment
When evaluating evidence:
1. **Check multiple sources** - Single source may be unreliable
2. **Prioritize human genetic evidence** - Strongest disease relevance
3. **Consider clinical precedence** - Known drugs indicate druggability
4. **Assess mechanistic support** - Pathway evidence supports biology
5. **Review literature manually** - For critical decisions, read primary publications
6. **Validate in primary databases** - Cross-reference with ClinVar, ClinGen, etc.

View File

@@ -0,0 +1,401 @@
# Target Annotations and Features
## Overview
Open Targets defines a target as "any naturally-occurring molecule that can be targeted by a medicinal product." Targets are primarily protein-coding genes identified by Ensembl gene IDs, but also include RNAs and pseudogenes from canonical chromosomes.
## Core Target Annotations
### 1. Tractability Assessment
Tractability evaluates the druggability potential of a target across different modalities.
#### Modalities Assessed:
**Small Molecule**
- Prediction of small molecule druggability
- Based on structural features, chemical precedence
- Buckets: Clinical precedence, Discovery precedence, Predicted tractable
**Antibody**
- Likelihood of antibody-based therapeutic success
- Cell surface/secreted protein location
- Precedence categories similar to small molecules
**PROTAC (Protein Degradation)**
- Assessment for targeted protein degradation
- E3 ligase compatibility
- Emerging modality category
**Other Modalities**
- Gene therapy, RNA-based therapeutics
- Oligonucleotide approaches
#### Tractability Levels:
1. **Clinical Precedence** - Target of approved/clinical drug with similar mechanism
2. **Discovery Precedence** - Target of tool compounds or compounds in preclinical development
3. **Predicted Tractable** - Computational predictions suggest druggability
4. **Unknown** - Insufficient data to assess
### 2. Safety Liabilities
Safety information aggregated from multiple sources to identify potential toxicity concerns.
#### Data Sources:
**ToxCast**
- High-throughput toxicology screening data
- In vitro assay results
- Toxicity pathway activation
**AOPWiki (Adverse Outcome Pathways)**
- Mechanistic pathways from molecular initiating event to adverse outcome
- Systems toxicology frameworks
**PharmGKB**
- Pharmacogenomic relationships
- Genetic variants affecting drug response and toxicity
**Published Literature**
- Expert-curated safety concerns from publications
- Clinical trial adverse events
#### Safety Flags:
- **Organ toxicity** - Liver, kidney, cardiac effects
- **Target safety liability** - Known on-target toxic effects
- **Off-target effects** - Unintended activity concerns
- **Clinical observations** - Adverse events from drugs targeting gene
### 3. Baseline Expression
Gene/protein expression across tissues and cell types from multiple sources.
#### Data Sources:
**Expression Atlas**
- RNA-Seq expression across tissues/conditions
- Normalized expression levels (TPM, FPKM)
- Differential expression studies
**GTEx (Genotype-Tissue Expression)**
- Comprehensive tissue expression from healthy donors
- Median TPM across 53 tissues
- Expression variation analysis
**Human Protein Atlas**
- Protein expression via immunohistochemistry
- Subcellular localization
- Tissue specificity classifications
#### Expression Metrics:
- **TPM (Transcripts Per Million)** - Normalized RNA abundance
- **Tissue specificity** - Enrichment in specific tissues
- **Protein level** - Correlation with RNA expression
- **Subcellular location** - Where protein is found in cell
### 4. Molecular Interactions
Protein-protein interactions, complex memberships, and molecular partnerships.
#### Interaction Types:
**Physical Interactions**
- Direct protein-protein binding
- Complex components
- Sources: IntAct, BioGRID, STRING
**Pathway Membership**
- Biological pathways from Reactome
- Functional relationships
- Upstream/downstream regulators
**Target Interactors**
- Direct interactors relevant to disease associations
- Context-specific interactions
### 5. Gene Essentiality
Dependency data indicating if gene is essential for cell survival.
#### Data Sources:
**Project Score**
- CRISPR-Cas9 fitness screens
- 300+ cancer cell lines
- Scaled essentiality scores (0-1)
**DepMap Portal**
- Large-scale cancer dependency data
- Genetic and pharmacological perturbations
- Common essential genes identification
#### Essentiality Metrics:
- **Score range**: 0 (non-essential) to 1 (essential)
- **Context**: Cell line specific vs. pan-essential
- **Therapeutic window**: Selectivity between disease and normal cells
### 6. Chemical Probes and Tool Compounds
High-quality small molecules for target validation.
#### Sources:
**Probes & Drugs Portal**
- Chemical probes with characterized selectivity
- Quality ratings and annotations
- Target engagement data
**Structural Genomics Consortium (SGC)**
- Target Enabling Packages (TEPs)
- Comprehensive target reagents
- Freely available to academia
**Probe Criteria:**
- Potency (typically IC50 < 100 nM)
- Selectivity (>30-fold vs. off-targets)
- Cell activity demonstrated
- Negative control available
### 7. Pharmacogenetics
Genetic variants affecting drug response for drugs targeting the gene.
#### Data Source: ClinPGx
**Information Included:**
- Variant-drug pairs
- Clinical annotations (dosing, efficacy, toxicity)
- Evidence level and sources
- PharmGKB cross-references
**Clinical Utility:**
- Dosing adjustments based on genotype
- Contraindications for specific variants
- Efficacy predictors
### 8. Genetic Constraint
Measures of negative selection against variants in the gene.
#### Data Source: gnomAD
**Metrics:**
**pLI (probability of Loss-of-function Intolerance)**
- Range: 0-1
- pLI > 0.9 indicates intolerant to LoF variants
- High pLI suggests essentiality
**LOEUF (Loss-of-function Observed/Expected Upper bound Fraction)**
- Lower values indicate greater constraint
- More interpretable than pLI across range
**Missense Constraint**
- Z-scores for missense depletion
- O/E ratios for missense variants
**Interpretation:**
- High constraint suggests important biological function
- May indicate safety concerns if inhibited
- Essential genes often show high constraint
### 9. Comparative Genomics
Cross-species gene conservation and ortholog information.
#### Data Source: Ensembl Compara
**Ortholog Data:**
- Mouse, rat, zebrafish, other model organisms
- Orthology confidence (1:1, 1:many, many:many)
- Percent identity and similarity
**Utility:**
- Model organism studies transferability
- Functional conservation assessment
- Evolution and selective pressure
### 10. Cancer Annotations
Cancer-specific target features for oncology indications.
#### Data Sources:
**Cancer Gene Census**
- Role in cancer (oncogene, TSG, fusion)
- Tier classification (1 = established, 2 = emerging)
- Tumor types and mutation types
**Cancer Hallmarks**
- Functional roles in cancer biology
- Hallmarks: proliferation, apoptosis evasion, metastasis, etc.
- Links to specific cancer processes
**Oncology Clinical Trials**
- Drugs in development targeting gene for cancer
- Trial phases and indications
### 11. Mouse Phenotypes
Phenotypes from mouse knockout/mutation studies.
#### Data Source: MGI (Mouse Genome Informatics)
**Phenotype Data:**
- Knockout phenotypes
- Disease model associations
- Mammalian Phenotype Ontology (MP) terms
**Utility:**
- Predict on-target effects
- Safety liability identification
- Mechanism of action insights
### 12. Pathways
Biological pathway annotations placing target in functional context.
#### Data Source: Reactome
**Pathway Information:**
- Curated biological pathways
- Hierarchical organization
- Pathway diagrams with target position
**Applications:**
- Mechanism hypothesis generation
- Related target identification
- Systems biology analysis
## Using Target Annotations in Queries
### Query Template: Comprehensive Target Profile
```python
query = """
query targetProfile($ensemblId: String!) {
target(ensemblId: $ensemblId) {
id
approvedSymbol
approvedName
biotype
# Tractability
tractability {
label
modality
value
}
# Safety
safetyLiabilities {
event
effects {
dosing
organsAffected
}
}
# Expression
expressions {
tissue {
label
}
rna {
value
level
}
protein {
level
}
}
# Chemical probes
chemicalProbes {
id
probeminer
origin
}
# Known drugs
knownDrugs {
uniqueDrugs
rows {
drug {
name
maximumClinicalTrialPhase
}
phase
status
}
}
# Genetic constraint
geneticConstraint {
constraintType
score
exp
obs
}
# Pathways
pathways {
pathway
pathwayId
}
}
}
"""
variables = {"ensemblId": "ENSG00000157764"}
```
## Annotation Interpretation Guidelines
### For Target Prioritization:
1. **Druggability (Tractability):**
- Clinical precedence >> Discovery precedence > Predicted
- Consider modality relevant to therapeutic approach
- Check for existing tool compounds
2. **Safety Assessment:**
- Review organ toxicity signals
- Check expression in critical tissues
- Assess genetic constraint (high = safety concern if inhibited)
- Evaluate clinical adverse events from drugs
3. **Disease Relevance:**
- Combine with association scores
- Check expression in disease-relevant tissues
- Review pathway context
4. **Validation Readiness:**
- Chemical probes available?
- Model organism data supportive?
- Known drugs provide mechanism insight?
5. **Clinical Path Considerations:**
- Pharmacogenetic factors
- Expression pattern (tissue-specific is better for selectivity)
- Essentiality (non-essential better for safety)
### Red Flags:
- **High essentiality + ubiquitous expression** - Poor therapeutic window
- **Multiple safety liabilities** - Toxicity concerns
- **High genetic constraint (pLI > 0.9)** - Critical gene, inhibition may be harmful
- **No tractability precedence** - Higher risk, longer development
- **Conflicting evidence** - Requires deeper investigation
### Green Flags:
- **Clinical precedence + related indication** - De-risked mechanism
- **Tissue-specific expression** - Better selectivity
- **Chemical probes available** - Faster validation
- **Low essentiality + disease relevance** - Good therapeutic window
- **Multiple evidence types converge** - Higher confidence

View File

@@ -0,0 +1,403 @@
#!/usr/bin/env python3
"""
Open Targets Platform GraphQL Query Helper
This script provides reusable functions for querying the Open Targets Platform
GraphQL API. Use these functions to retrieve target, disease, drug, and
association data.
Dependencies: requests (pip install requests)
"""
import requests
import json
from typing import Dict, List, Optional, Any
# API endpoint
BASE_URL = "https://api.platform.opentargets.org/api/v4/graphql"
def execute_query(query: str, variables: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Execute a GraphQL query against the Open Targets Platform API.
Args:
query: GraphQL query string
variables: Optional dictionary of variables for the query
Returns:
Dictionary containing the API response data
Raises:
Exception if the API request fails or returns errors
"""
payload = {"query": query}
if variables:
payload["variables"] = variables
try:
response = requests.post(BASE_URL, json=payload, timeout=30)
response.raise_for_status()
data = response.json()
if "errors" in data:
raise Exception(f"GraphQL errors: {data['errors']}")
return data.get("data", {})
except requests.exceptions.RequestException as e:
raise Exception(f"API request failed: {str(e)}")
def search_entities(query_string: str, entity_types: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""
Search for targets, diseases, or drugs by name or identifier.
Args:
query_string: Search term (e.g., "BRCA1", "alzheimer", "aspirin")
entity_types: Optional list to filter by entity type ["target", "disease", "drug"]
Returns:
List of search results with id, name, entity type, and description
"""
query = """
query search($queryString: String!, $entityNames: [String!]) {
search(queryString: $queryString, entityNames: $entityNames, page: {size: 10}) {
hits {
id
entity
name
description
}
}
}
"""
variables = {"queryString": query_string}
if entity_types:
variables["entityNames"] = entity_types
result = execute_query(query, variables)
return result.get("search", {}).get("hits", [])
def get_target_info(ensembl_id: str, include_diseases: bool = False) -> Dict[str, Any]:
"""
Retrieve comprehensive information about a target gene.
Args:
ensembl_id: Ensembl gene ID (e.g., "ENSG00000157764")
include_diseases: Whether to include top associated diseases
Returns:
Dictionary with target information including tractability, safety, expression
"""
disease_fragment = """
associatedDiseases(page: {size: 10}) {
rows {
disease {
id
name
}
score
datatypeScores {
componentId
score
}
}
}
""" if include_diseases else ""
query = f"""
query targetInfo($ensemblId: String!) {{
target(ensemblId: $ensemblId) {{
id
approvedSymbol
approvedName
biotype
functionDescriptions
tractability {{
label
modality
value
}}
safetyLiabilities {{
event
effects {{
dosing
organsAffected
}}
biosamples {{
tissue {{
label
}}
}}
}}
geneticConstraint {{
constraintType
score
exp
obs
}}
{disease_fragment}
}}
}}
"""
result = execute_query(query, {"ensemblId": ensembl_id})
return result.get("target", {})
def get_disease_info(efo_id: str, include_targets: bool = False) -> Dict[str, Any]:
"""
Retrieve information about a disease.
Args:
efo_id: EFO disease identifier (e.g., "EFO_0000249")
include_targets: Whether to include top associated targets
Returns:
Dictionary with disease information
"""
target_fragment = """
associatedTargets(page: {size: 10}) {
rows {
target {
id
approvedSymbol
approvedName
}
score
datatypeScores {
componentId
score
}
}
}
""" if include_targets else ""
query = f"""
query diseaseInfo($efoId: String!) {{
disease(efoId: $efoId) {{
id
name
description
therapeuticAreas {{
id
name
}}
synonyms {{
terms
}}
{target_fragment}
}}
}}
"""
result = execute_query(query, {"efoId": efo_id})
return result.get("disease", {})
def get_target_disease_evidence(ensembl_id: str, efo_id: str,
data_types: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""
Retrieve evidence linking a target to a disease.
Args:
ensembl_id: Ensembl gene ID
efo_id: EFO disease identifier
data_types: Optional filter for evidence types (e.g., ["genetic_association", "known_drug"])
Returns:
List of evidence records with scores and sources
"""
query = """
query evidences($ensemblId: String!, $efoId: String!, $dataTypes: [String!]) {
disease(efoId: $efoId) {
evidences(ensemblIds: [$ensemblId], datatypes: $dataTypes, size: 100) {
rows {
datasourceId
datatypeId
score
targetFromSourceId
studyId
literature
cohortPhenotypes
}
}
}
}
"""
variables = {"ensemblId": ensembl_id, "efoId": efo_id}
if data_types:
variables["dataTypes"] = data_types
result = execute_query(query, variables)
return result.get("disease", {}).get("evidences", {}).get("rows", [])
def get_known_drugs_for_disease(efo_id: str) -> Dict[str, Any]:
"""
Get drugs known to be used for a disease.
Args:
efo_id: EFO disease identifier
Returns:
Dictionary with drug information including phase, targets, and status
"""
query = """
query knownDrugs($efoId: String!) {
disease(efoId: $efoId) {
knownDrugs {
uniqueDrugs
uniqueTargets
rows {
drug {
id
name
drugType
maximumClinicalTrialPhase
}
targets {
id
approvedSymbol
}
phase
status
mechanismOfAction
}
}
}
}
"""
result = execute_query(query, {"efoId": efo_id})
return result.get("disease", {}).get("knownDrugs", {})
def get_drug_info(chembl_id: str) -> Dict[str, Any]:
"""
Retrieve information about a drug.
Args:
chembl_id: ChEMBL identifier (e.g., "CHEMBL25")
Returns:
Dictionary with drug information
"""
query = """
query drugInfo($chemblId: String!) {
drug(chemblId: $chemblId) {
id
name
synonyms
drugType
maximumClinicalTrialPhase
hasBeenWithdrawn
withdrawnNotice {
reasons
countries
}
mechanismsOfAction {
actionType
mechanismOfAction
targetName
targets {
id
approvedSymbol
}
}
indications {
disease
efoId
maxPhaseForIndication
}
}
}
"""
result = execute_query(query, {"chemblId": chembl_id})
return result.get("drug", {})
def get_target_associations(ensembl_id: str, min_score: float = 0.0) -> List[Dict[str, Any]]:
"""
Get all disease associations for a target, filtered by minimum score.
Args:
ensembl_id: Ensembl gene ID
min_score: Minimum association score (0-1) to include
Returns:
List of disease associations with scores
"""
query = """
query targetAssociations($ensemblId: String!) {
target(ensemblId: $ensemblId) {
associatedDiseases(page: {size: 100}) {
count
rows {
disease {
id
name
}
score
datatypeScores {
componentId
score
}
}
}
}
}
"""
result = execute_query(query, {"ensemblId": ensembl_id})
associations = result.get("target", {}).get("associatedDiseases", {}).get("rows", [])
# Filter by minimum score
return [assoc for assoc in associations if assoc.get("score", 0) >= min_score]
# Example usage
if __name__ == "__main__":
# Example 1: Search for a gene
print("Searching for BRCA1...")
results = search_entities("BRCA1", entity_types=["target"])
for result in results[:3]:
print(f" {result['name']} ({result['id']})")
# Example 2: Get target information
if results:
ensembl_id = results[0]['id']
print(f"\nGetting info for {ensembl_id}...")
target_info = get_target_info(ensembl_id, include_diseases=True)
print(f" Symbol: {target_info.get('approvedSymbol')}")
print(f" Name: {target_info.get('approvedName')}")
# Show top diseases
diseases = target_info.get('associatedDiseases', {}).get('rows', [])
if diseases:
print(f"\n Top associated diseases:")
for disease in diseases[:3]:
print(f" - {disease['disease']['name']} (score: {disease['score']:.2f})")
# Example 3: Search for a disease
print("\n\nSearching for Alzheimer's disease...")
disease_results = search_entities("alzheimer", entity_types=["disease"])
if disease_results:
efo_id = disease_results[0]['id']
print(f" Found: {disease_results[0]['name']} ({efo_id})")
# Get known drugs
print(f"\n Known drugs for {disease_results[0]['name']}:")
drugs = get_known_drugs_for_disease(efo_id)
for drug in drugs.get('rows', [])[:5]:
print(f" - {drug['drug']['name']} (Phase {drug['phase']})")

View File

@@ -0,0 +1,597 @@
---
name: uspto-database
description: Access and analyze United States Patent and Trademark Office (USPTO) data including patent searches, trademark lookups, patent examination history, office actions, assignments, citations, and litigation records. Use this skill when working with US patents or trademarks, analyzing patent prosecution, tracking IP ownership, conducting prior art searches, monitoring trademark status, retrieving patent examination data from PEDS, searching patent citations, or analyzing patent portfolios. Supports PatentSearch API (ElasticSearch-based patent search), PEDS (Patent Examination Data System), TSDR (Trademark Status & Document Retrieval), Patent/Trademark Assignment APIs, Office Action APIs, PTAB proceedings, and patent litigation data.
---
# USPTO Database
## Overview
Access comprehensive United States Patent and Trademark Office data through multiple specialized APIs. This skill enables patent and trademark searching, retrieval of examination history, analysis of citations and office actions, tracking of assignments and ownership, and access to litigation records.
## When to Use This Skill
Use this skill for tasks involving:
- **Patent Search**: Finding patents by keywords, inventors, assignees, classifications, or dates
- **Patent Details**: Retrieving full patent data including claims, abstracts, citations
- **Trademark Search**: Looking up trademarks by serial or registration number
- **Trademark Status**: Checking trademark status, ownership, and prosecution history
- **Examination History**: Accessing patent prosecution data from PEDS (Patent Examination Data System)
- **Office Actions**: Retrieving office action text, citations, and rejections
- **Assignments**: Tracking patent/trademark ownership transfers
- **Citations**: Analyzing patent citations (forward and backward)
- **Litigation**: Accessing patent litigation records
- **Portfolio Analysis**: Analyzing patent/trademark portfolios for companies or inventors
## USPTO API Ecosystem
The USPTO provides multiple specialized APIs for different data needs:
### Core APIs
1. **PatentSearch API** - Modern ElasticSearch-based patent search (replaced legacy PatentsView in May 2025)
- Search patents by keywords, inventors, assignees, classifications, dates
- Access to patent data through June 30, 2025
- 45 requests/minute rate limit
- **Base URL**: `https://search.patentsview.org/api/v1/`
2. **PEDS (Patent Examination Data System)** - Patent examination history
- Application status and transaction history from 1981-present
- Office action dates and examination events
- Use `uspto-opendata-python` Python library
- **Replaced**: PAIR Bulk Data (PBD) - decommissioned
3. **TSDR (Trademark Status & Document Retrieval)** - Trademark data
- Trademark status, ownership, prosecution history
- Search by serial or registration number
- **Base URL**: `https://tsdrapi.uspto.gov/ts/cd/`
### Additional APIs
4. **Patent Assignment Search** - Ownership records and transfers
5. **Trademark Assignment Search** - Trademark ownership changes
6. **Enriched Citation API** - Patent citation analysis
7. **Office Action Text Retrieval** - Full text of office actions
8. **Office Action Citations** - Citations from office actions
9. **Office Action Rejection** - Rejection reasons and types
10. **PTAB API** - Patent Trial and Appeal Board proceedings
11. **Patent Litigation Cases** - Federal district court litigation data
12. **Cancer Moonshot Data Set** - Cancer-related patents
## Quick Start
### API Key Registration
All USPTO APIs require an API key. Register at:
**https://account.uspto.gov/api-manager/**
Set the API key as an environment variable:
```bash
export USPTO_API_KEY="your_api_key_here"
```
### Helper Scripts
This skill includes Python scripts for common operations:
- **`scripts/patent_search.py`** - PatentSearch API client for searching patents
- **`scripts/peds_client.py`** - PEDS client for examination history
- **`scripts/trademark_client.py`** - TSDR client for trademark data
## Task 1: Searching Patents
### Using the PatentSearch API
The PatentSearch API uses a JSON query language with various operators for flexible searching.
#### Basic Patent Search Examples
**Search by keywords in abstract:**
```python
from scripts.patent_search import PatentSearchClient
client = PatentSearchClient()
# Search for machine learning patents
results = client.search_patents({
"patent_abstract": {"_text_all": ["machine", "learning"]}
})
for patent in results['patents']:
print(f"{patent['patent_number']}: {patent['patent_title']}")
```
**Search by inventor:**
```python
results = client.search_by_inventor("John Smith")
```
**Search by assignee/company:**
```python
results = client.search_by_assignee("Google")
```
**Search by date range:**
```python
results = client.search_by_date_range("2024-01-01", "2024-12-31")
```
**Search by CPC classification:**
```python
results = client.search_by_classification("H04N") # Video/image tech
```
#### Advanced Patent Search
Combine multiple criteria with logical operators:
```python
results = client.advanced_search(
keywords=["artificial", "intelligence"],
assignee="Microsoft",
start_date="2023-01-01",
end_date="2024-12-31",
cpc_codes=["G06N", "G06F"] # AI and computing classifications
)
```
#### Direct API Usage
For complex queries, use the API directly:
```python
import requests
url = "https://search.patentsview.org/api/v1/patent"
headers = {
"X-Api-Key": "YOUR_API_KEY",
"Content-Type": "application/json"
}
query = {
"q": {
"_and": [
{"patent_date": {"_gte": "2024-01-01"}},
{"assignee_organization": {"_text_any": ["Google", "Alphabet"]}},
{"cpc_subclass_id": ["G06N", "H04N"]}
]
},
"f": ["patent_number", "patent_title", "patent_date", "inventor_name"],
"s": [{"patent_date": "desc"}],
"o": {"per_page": 100, "page": 1}
}
response = requests.post(url, headers=headers, json=query)
results = response.json()
```
### Query Operators
- **Equality**: `{"field": "value"}` or `{"field": {"_eq": "value"}}`
- **Comparison**: `_gt`, `_gte`, `_lt`, `_lte`, `_neq`
- **Text search**: `_text_all`, `_text_any`, `_text_phrase`
- **String matching**: `_begins`, `_contains`
- **Logical**: `_and`, `_or`, `_not`
**Best Practice**: Use `_text_*` operators for text fields (more performant than `_contains` or `_begins`)
### Available Patent Endpoints
- `/patent` - Granted patents
- `/publication` - Pregrant publications
- `/inventor` - Inventor information
- `/assignee` - Assignee information
- `/cpc_subclass`, `/cpc_at_issue` - CPC classifications
- `/uspc` - US Patent Classification
- `/ipc` - International Patent Classification
- `/claims`, `/brief_summary_text`, `/detail_description_text` - Text data (beta)
### Reference Documentation
See `references/patentsearch_api.md` for complete PatentSearch API documentation including:
- All available endpoints
- Complete field reference
- Query syntax and examples
- Response formats
- Rate limits and best practices
## Task 2: Retrieving Patent Examination Data
### Using PEDS (Patent Examination Data System)
PEDS provides comprehensive prosecution history including transaction events, status changes, and examination timeline.
#### Installation
```bash
pip install uspto-opendata-python
```
#### Basic PEDS Usage
**Get application data:**
```python
from scripts.peds_client import PEDSHelper
helper = PEDSHelper()
# By application number
app_data = helper.get_application("16123456")
print(f"Title: {app_data['title']}")
print(f"Status: {app_data['app_status']}")
# By patent number
patent_data = helper.get_patent("11234567")
```
**Get transaction history:**
```python
transactions = helper.get_transaction_history("16123456")
for trans in transactions:
print(f"{trans['date']}: {trans['code']} - {trans['description']}")
```
**Get office actions:**
```python
office_actions = helper.get_office_actions("16123456")
for oa in office_actions:
if oa['code'] == 'CTNF':
print(f"Non-final rejection: {oa['date']}")
elif oa['code'] == 'CTFR':
print(f"Final rejection: {oa['date']}")
elif oa['code'] == 'NOA':
print(f"Notice of allowance: {oa['date']}")
```
**Get status summary:**
```python
summary = helper.get_status_summary("16123456")
print(f"Current status: {summary['current_status']}")
print(f"Filing date: {summary['filing_date']}")
print(f"Pendency: {summary['pendency_days']} days")
if summary['is_patented']:
print(f"Patent number: {summary['patent_number']}")
print(f"Issue date: {summary['issue_date']}")
```
#### Prosecution Analysis
Analyze prosecution patterns:
```python
analysis = helper.analyze_prosecution("16123456")
print(f"Total office actions: {analysis['total_office_actions']}")
print(f"Non-final rejections: {analysis['non_final_rejections']}")
print(f"Final rejections: {analysis['final_rejections']}")
print(f"Allowed: {analysis['allowance']}")
print(f"Responses filed: {analysis['responses']}")
```
### Common Transaction Codes
- **CTNF** - Non-final rejection mailed
- **CTFR** - Final rejection mailed
- **NOA** - Notice of allowance mailed
- **WRIT** - Response filed
- **ISS.FEE** - Issue fee payment
- **ABND** - Application abandoned
- **AOPF** - Office action mailed
### Reference Documentation
See `references/peds_api.md` for complete PEDS documentation including:
- All available data fields
- Transaction code reference
- Python library usage
- Portfolio analysis examples
## Task 3: Searching and Monitoring Trademarks
### Using TSDR (Trademark Status & Document Retrieval)
Access trademark status, ownership, and prosecution history.
#### Basic Trademark Usage
**Get trademark by serial number:**
```python
from scripts.trademark_client import TrademarkClient
client = TrademarkClient()
# By serial number
tm_data = client.get_trademark_by_serial("87654321")
# By registration number
tm_data = client.get_trademark_by_registration("5678901")
```
**Get trademark status:**
```python
status = client.get_trademark_status("87654321")
print(f"Mark: {status['mark_text']}")
print(f"Status: {status['status']}")
print(f"Filing date: {status['filing_date']}")
if status['is_registered']:
print(f"Registration #: {status['registration_number']}")
print(f"Registration date: {status['registration_date']}")
```
**Check trademark health:**
```python
health = client.check_trademark_health("87654321")
print(f"Mark: {health['mark']}")
print(f"Status: {health['status']}")
for alert in health['alerts']:
print(alert)
if health['needs_attention']:
print("⚠️ This mark needs attention!")
```
#### Trademark Portfolio Monitoring
Monitor multiple trademarks:
```python
def monitor_portfolio(serial_numbers, api_key):
"""Monitor trademark portfolio health."""
client = TrademarkClient(api_key)
results = {
'active': [],
'pending': [],
'problems': []
}
for sn in serial_numbers:
health = client.check_trademark_health(sn)
if 'REGISTERED' in health['status']:
results['active'].append(health)
elif 'PENDING' in health['status'] or 'PUBLISHED' in health['status']:
results['pending'].append(health)
elif health['needs_attention']:
results['problems'].append(health)
return results
```
### Common Trademark Statuses
- **REGISTERED** - Active registered mark
- **PENDING** - Under examination
- **PUBLISHED FOR OPPOSITION** - In opposition period
- **ABANDONED** - Application abandoned
- **CANCELLED** - Registration cancelled
- **SUSPENDED** - Examination suspended
- **REGISTERED AND RENEWED** - Registration renewed
### Reference Documentation
See `references/trademark_api.md` for complete trademark API documentation including:
- TSDR API reference
- Trademark Assignment Search API
- All status codes
- Prosecution history access
- Ownership tracking
## Task 4: Tracking Assignments and Ownership
### Patent and Trademark Assignments
Both patents and trademarks have Assignment Search APIs for tracking ownership changes.
#### Patent Assignment API
**Base URL**: `https://assignment-api.uspto.gov/patent/v1.4/`
**Search by patent number:**
```python
import requests
import xml.etree.ElementTree as ET
def get_patent_assignments(patent_number, api_key):
url = f"https://assignment-api.uspto.gov/patent/v1.4/assignment/patent/{patent_number}"
headers = {"X-Api-Key": api_key}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text # Returns XML
assignments_xml = get_patent_assignments("11234567", api_key)
root = ET.fromstring(assignments_xml)
for assignment in root.findall('.//assignment'):
recorded_date = assignment.find('recordedDate').text
assignor = assignment.find('.//assignor/name').text
assignee = assignment.find('.//assignee/name').text
conveyance = assignment.find('conveyanceText').text
print(f"{recorded_date}: {assignor}{assignee}")
print(f" Type: {conveyance}\n")
```
**Search by company name:**
```python
def find_company_patents(company_name, api_key):
url = "https://assignment-api.uspto.gov/patent/v1.4/assignment/search"
headers = {"X-Api-Key": api_key}
data = {"criteria": {"assigneeName": company_name}}
response = requests.post(url, headers=headers, json=data)
return response.text
```
### Common Assignment Types
- **ASSIGNMENT OF ASSIGNORS INTEREST** - Ownership transfer
- **SECURITY AGREEMENT** - Collateral/security interest
- **MERGER** - Corporate merger
- **CHANGE OF NAME** - Name change
- **ASSIGNMENT OF PARTIAL INTEREST** - Partial ownership
## Task 5: Accessing Additional USPTO Data
### Office Actions, Citations, and Litigation
Multiple specialized APIs provide additional patent data.
#### Office Action Text Retrieval
Retrieve full text of office actions using application number. Integrate with PEDS to identify which office actions exist, then retrieve full text.
#### Enriched Citation API
Analyze patent citations:
- Forward citations (patents citing this patent)
- Backward citations (prior art cited)
- Examiner vs. applicant citations
- Citation context
#### Patent Litigation Cases API
Access federal district court patent litigation records:
- 74,623+ litigation records
- Patents asserted
- Parties and venues
- Case outcomes
#### PTAB API
Patent Trial and Appeal Board proceedings:
- Inter partes review (IPR)
- Post-grant review (PGR)
- Appeal decisions
### Reference Documentation
See `references/additional_apis.md` for comprehensive documentation on:
- Enriched Citation API
- Office Action APIs (Text, Citations, Rejections)
- Patent Litigation Cases API
- PTAB API
- Cancer Moonshot Data Set
- OCE Status/Event Codes
## Complete Analysis Example
### Comprehensive Patent Analysis
Combine multiple APIs for complete patent intelligence:
```python
def comprehensive_patent_analysis(patent_number, api_key):
"""
Full patent analysis using multiple USPTO APIs.
"""
from scripts.patent_search import PatentSearchClient
from scripts.peds_client import PEDSHelper
results = {}
# 1. Get patent details
patent_client = PatentSearchClient(api_key)
patent_data = patent_client.get_patent(patent_number)
results['patent'] = patent_data
# 2. Get examination history
peds = PEDSHelper()
results['prosecution'] = peds.analyze_prosecution(patent_number)
results['status'] = peds.get_status_summary(patent_number)
# 3. Get assignment history
import requests
assign_url = f"https://assignment-api.uspto.gov/patent/v1.4/assignment/patent/{patent_number}"
assign_resp = requests.get(assign_url, headers={"X-Api-Key": api_key})
results['assignments'] = assign_resp.text if assign_resp.status_code == 200 else None
# 4. Analyze results
print(f"\n=== Patent {patent_number} Analysis ===\n")
print(f"Title: {patent_data['patent_title']}")
print(f"Assignee: {', '.join(patent_data.get('assignee_organization', []))}")
print(f"Issue Date: {patent_data['patent_date']}")
print(f"\nProsecution:")
print(f" Office Actions: {results['prosecution']['total_office_actions']}")
print(f" Rejections: {results['prosecution']['non_final_rejections']} non-final, {results['prosecution']['final_rejections']} final")
print(f" Pendency: {results['prosecution']['pendency_days']} days")
# Analyze citations
if 'cited_patent_number' in patent_data:
print(f"\nCitations:")
print(f" Cites: {len(patent_data['cited_patent_number'])} patents")
if 'citedby_patent_number' in patent_data:
print(f" Cited by: {len(patent_data['citedby_patent_number'])} patents")
return results
```
## Best Practices
1. **API Key Management**
- Store API key in environment variables
- Never commit keys to version control
- Use same key across all USPTO APIs
2. **Rate Limiting**
- PatentSearch: 45 requests/minute
- Implement exponential backoff for rate limit errors
- Cache responses when possible
3. **Query Optimization**
- Use `_text_*` operators for text fields (more performant)
- Request only needed fields to reduce response size
- Use date ranges to narrow searches
4. **Data Handling**
- Not all fields populated for all patents/trademarks
- Handle missing data gracefully
- Parse dates consistently
5. **Combining APIs**
- Use PatentSearch for discovery
- Use PEDS for prosecution details
- Use Assignment APIs for ownership tracking
- Combine data for comprehensive analysis
## Important Notes
- **Legacy API Sunset**: PatentsView legacy API discontinued May 1, 2025 - use PatentSearch API
- **PAIR Bulk Data Decommissioned**: Use PEDS instead
- **Data Coverage**: PatentSearch has data through June 30, 2025; PEDS from 1981-present
- **Text Endpoints**: Claims and description endpoints are in beta with ongoing backfilling
- **Rate Limits**: Respect rate limits to avoid service disruptions
## Resources
### API Documentation
- **PatentSearch API**: https://search.patentsview.org/docs/
- **USPTO Developer Portal**: https://developer.uspto.gov/
- **USPTO Open Data Portal**: https://data.uspto.gov/
- **API Key Registration**: https://account.uspto.gov/api-manager/
### Python Libraries
- **uspto-opendata-python**: https://pypi.org/project/uspto-opendata-python/
- **USPTO Docs**: https://docs.ip-tools.org/uspto-opendata-python/
### Reference Files
- `references/patentsearch_api.md` - Complete PatentSearch API reference
- `references/peds_api.md` - PEDS API and library documentation
- `references/trademark_api.md` - Trademark APIs (TSDR and Assignment)
- `references/additional_apis.md` - Citations, Office Actions, Litigation, PTAB
### Scripts
- `scripts/patent_search.py` - PatentSearch API client
- `scripts/peds_client.py` - PEDS examination data client
- `scripts/trademark_client.py` - Trademark search client

View File

@@ -0,0 +1,394 @@
# Additional USPTO APIs Reference
## Overview
Beyond patent search, PEDS, and trademarks, USPTO provides specialized APIs for citations, office actions, assignments, litigation, and other patent data.
## 1. Enriched Citation API
### Overview
Provides insights into patent evaluation processes and cited references for the IP5 (USPTO, EPO, JPO, KIPO, CNIPA) and public use.
**Versions:** v3, v2, v1
**Base URL:** Access through USPTO Open Data Portal
### Purpose
Analyze which references examiners cite during patent examination and how patents cite prior art.
### Key Features
- **Forward citations** - Patents that cite a given patent
- **Backward citations** - References cited by a patent
- **Examiner citations** - References cited by examiner vs. applicant
- **Citation context** - How and why references are cited
### Use Cases
- Prior art analysis
- Patent landscape analysis
- Identifying related technologies
- Assessing patent strength based on citations
## 2. Office Action APIs
### 2.1 Office Action Text Retrieval API
**Version:** v1
### Purpose
Retrieves complete full-text office action correspondence documents for patent applications.
### Features
- Full text of office actions
- Restrictions, rejections, objections
- Examiner amendments
- Search information
### Example Use
```python
# Retrieve office action text by application number
def get_office_action_text(app_number, api_key):
"""
Fetch full text of office actions for an application.
Note: Integrate with PEDS to identify which office actions exist.
"""
# API implementation
pass
```
### 2.2 Office Action Citations API
**Versions:** v2, beta v1
### Purpose
Provides patent citation data extracted from office actions, showing which references examiners used during examination.
### Key Data
- Patent and non-patent literature citations
- Citation context (rejection, information, etc.)
- Examiner search strategies
- Prosecution research dataset
### 2.3 Office Action Rejection API
**Versions:** v2, beta v1
### Purpose
Details rejection reasons and examination outcomes with bulk rejection data through March 2025.
### Rejection Types
- **35 U.S.C. § 102** - Anticipation (lack of novelty)
- **35 U.S.C. § 103** - Obviousness
- **35 U.S.C. § 112** - Enablement, written description, indefiniteness
- **35 U.S.C. § 101** - Subject matter eligibility
### Use Cases
- Analyze common rejection reasons
- Identify problematic claim language
- Prepare responses based on historical data
- Portfolio analysis of rejection patterns
### 2.4 Office Action Weekly Zips API
**Version:** v1
### Purpose
Delivers bulk downloads of full-text office action documents organized by weekly release schedules.
### Features
- Weekly archive downloads
- Complete office action text
- Bulk access for large-scale analysis
## 3. Patent Assignment Search API
### Overview
**Version:** v1.4
Accesses USPTO patent assignment database for ownership records and transfers.
**Base URL:** `https://assignment-api.uspto.gov/patent/`
### Purpose
Track patent ownership, assignments, security interests, and corporate transactions.
### Search Methods
#### By Patent Number
```
GET /v1.4/assignment/patent/{patent_number}
```
#### By Application Number
```
GET /v1.4/assignment/application/{application_number}
```
#### By Assignee Name
```
POST /v1.4/assignment/search
{
"criteria": {
"assigneeName": "Company Name"
}
}
```
### Response Format
Returns XML with assignment records similar to trademark assignments:
- Reel/frame numbers
- Conveyance type
- Dates (execution and recorded)
- Assignors and assignees
- Affected patents/applications
### Common Uses
```python
def track_patent_ownership(patent_number, api_key):
"""Track ownership history of a patent."""
url = f"https://assignment-api.uspto.gov/patent/v1.4/assignment/patent/{patent_number}"
headers = {"X-Api-Key": api_key}
response = requests.get(url, headers=headers)
if response.status_code == 200:
# Parse XML to extract assignment history
return response.text
return None
def find_company_patents(company_name, api_key):
"""Find patents assigned to a company."""
url = "https://assignment-api.uspto.gov/patent/v1.4/assignment/search"
headers = {"X-Api-Key": api_key}
data = {"criteria": {"assigneeName": company_name}}
response = requests.post(url, headers=headers, json=data)
return response.text
```
## 4. PTAB API (Patent Trial and Appeal Board)
### Overview
**Version:** v2
Access to Patent Trial and Appeal Board proceedings data.
### Purpose
Retrieve information about:
- Inter partes review (IPR)
- Post-grant review (PGR)
- Covered business method (CBM) review
- Ex parte appeals
### Data Available
- Petition information
- Trial decisions
- Final written decisions
- Petitioner and patent owner information
- Claims challenged
- Trial outcomes
### Note
Currently migrating to new Open Data Portal. Check current documentation for access details.
## 5. Patent Litigation Cases API
### Overview
**Version:** v1
Contains 74,623+ district court litigation records covering patent litigation data.
### Purpose
Access federal district court patent infringement cases.
### Key Data
- Case numbers and filing dates
- Patents asserted
- Parties (plaintiffs and defendants)
- Venues
- Case outcomes
### Use Cases
- Litigation risk analysis
- Identify frequently litigated patents
- Track litigation trends
- Analyze venue preferences
- Assess patent enforcement patterns
## 6. Cancer Moonshot Patent Data Set API
### Overview
**Version:** v1.0.1
Specialized dataset for cancer-related patent discoveries.
### Purpose
Search and download patents related to cancer research, treatment, and diagnostics.
### Features
- Curated cancer-related patents
- Bulk data download
- Classification by cancer type
- Treatment modality categorization
### Use Cases
- Cancer research prior art
- Technology landscape analysis
- Identify research trends
- Licensing opportunities
## 7. OCE Patent Examination Status/Event Codes APIs
### Overview
**Version:** v1
Provides official descriptions of USPTO status and event codes used in patent examination.
### Purpose
Decode transaction codes and status codes found in PEDS and other examination data.
### Data Provided
- **Status codes** - Application status descriptions
- **Event codes** - Transaction/event descriptions
- **Code definitions** - Official meanings
### Integration
Use with PEDS data to interpret transaction codes:
```python
def get_code_description(code, api_key):
"""Get human-readable description of USPTO code."""
# Fetch from OCE API
pass
def enrich_peds_data(peds_transactions, api_key):
"""Add descriptions to PEDS transaction codes."""
for trans in peds_transactions:
trans['description'] = get_code_description(trans['code'], api_key)
return peds_transactions
```
## API Integration Patterns
### Combined Workflow Example
```python
def comprehensive_patent_analysis(patent_number, api_key):
"""
Comprehensive analysis combining multiple APIs.
"""
results = {}
# 1. Get patent details from PatentSearch
results['patent_data'] = search_patent(patent_number, api_key)
# 2. Get examination history from PEDS
results['prosecution'] = get_peds_data(patent_number, api_key)
# 3. Get assignment history
results['assignments'] = get_assignments(patent_number, api_key)
# 4. Get citation data
results['citations'] = get_citations(patent_number, api_key)
# 5. Check litigation history
results['litigation'] = get_litigation(patent_number, api_key)
# 6. Get PTAB challenges
results['ptab'] = get_ptab_proceedings(patent_number, api_key)
return results
```
### Portfolio Analysis Example
```python
def analyze_company_portfolio(company_name, api_key):
"""
Analyze a company's patent portfolio using multiple APIs.
"""
# 1. Find all assigned patents
assignments = find_company_patents(company_name, api_key)
patent_numbers = extract_patent_numbers(assignments)
# 2. Get details for each patent
portfolio = []
for patent_num in patent_numbers:
patent_data = {
'number': patent_num,
'details': search_patent(patent_num, api_key),
'citations': get_citations(patent_num, api_key),
'litigation': get_litigation(patent_num, api_key)
}
portfolio.append(patent_data)
# 3. Aggregate statistics
stats = {
'total_patents': len(portfolio),
'cited_by_count': sum(len(p['citations']) for p in portfolio),
'litigated_count': sum(1 for p in portfolio if p['litigation']),
'technology_areas': aggregate_tech_areas(portfolio)
}
return {'portfolio': portfolio, 'statistics': stats}
```
## Best Practices
1. **API Key Management** - Use environment variables, never hardcode
2. **Rate Limiting** - Implement exponential backoff for all APIs
3. **Caching** - Cache API responses to minimize redundant calls
4. **Error Handling** - Gracefully handle API errors and missing data
5. **Data Validation** - Validate input formats before API calls
6. **Combining APIs** - Use appropriate APIs together for comprehensive analysis
7. **Documentation** - Keep track of API versions and changes
## API Key Registration
All APIs require registration at:
**https://account.uspto.gov/api-manager/**
Single API key works across most USPTO APIs.
## Resources
- **Developer Portal**: https://developer.uspto.gov/
- **Open Data Portal**: https://data.uspto.gov/
- **API Catalog**: https://developer.uspto.gov/api-catalog
- **Swagger Docs**: Available for individual APIs

View File

@@ -0,0 +1,266 @@
# PatentSearch API Reference
## Overview
The PatentSearch API is USPTO's modern ElasticSearch-based patent search system that replaced the legacy PatentsView API in May 2025. It provides access to patent data through June 30, 2025, with regular updates.
**Base URL:** `https://search.patentsview.org/api/v1/`
## Authentication
All API requests require authentication using an API key in the request header:
```
X-Api-Key: YOUR_API_KEY
```
Register for an API key at: https://account.uspto.gov/api-manager/
## Rate Limits
- **45 requests per minute** per API key
- Exceeding rate limits results in HTTP 429 errors
## Available Endpoints
### Core Patent & Publication Endpoints
- **`/patent`** - General patent data (granted patents)
- **`/publication`** - Pregrant publication data
- **`/publication/rel_app_text`** - Related application data for publications
### Entity Endpoints
- **`/inventor`** - Inventor information with location and gender code fields
- **`/assignee`** - Assignee details with location identifiers
- **`/location`** - Geographic data including latitude/longitude coordinates
- **`/attorney`** - Legal representative information
### Classification Endpoints
- **`/cpc_subclass`** - Cooperative Patent Classification at subclass level
- **`/cpc_at_issue`** - CPC classification as of patent issue date
- **`/uspc`** - US Patent Classification data
- **`/wipo`** - World Intellectual Property Organization classifications
- **`/ipc`** - International Patent Classification
### Text Data Endpoints (Beta)
- **`/brief_summary_text`** - Patent brief summaries (granted and pre-grant)
- **`/claims`** - Patent claims text
- **`/drawing_description_text`** - Drawing descriptions
- **`/detail_description_text`** - Detailed description text
*Note: Text endpoints are in beta with data primarily from 2023 onward. Historical backfilling is in progress.*
### Supporting Endpoints
- **`/other_reference`** - Patent reference materials
- **`/related_document`** - Cross-references between patents
## Query Parameters
All endpoints support four main parameters:
### 1. Query String (`q`)
Filters data using JSON query objects. **Required parameter.**
**Query Operators:**
- **Equality**: `{"field": "value"}` or `{"field": {"_eq": "value"}}`
- **Not equal**: `{"field": {"_neq": "value"}}`
- **Comparison**: `_gt`, `_gte`, `_lt`, `_lte`
- **String matching**:
- `_begins` - starts with
- `_contains` - substring match
- **Full-text search** (recommended for text fields):
- `_text_all` - all terms must match
- `_text_any` - any term matches
- `_text_phrase` - exact phrase match
- **Logical operators**: `_and`, `_or`, `_not`
- **Array matching**: Use arrays for OR conditions
**Examples:**
```json
// Simple equality
{"patent_number": "11234567"}
// Date range
{"patent_date": {"_gte": "2020-01-01", "_lte": "2020-12-31"}}
// Text search (preferred for text fields)
{"patent_abstract": {"_text_all": ["machine", "learning"]}}
// Inventor name
{"inventor_name": {"_text_phrase": "John Smith"}}
// Complex query with logical operators
{
"_and": [
{"patent_date": {"_gte": "2020-01-01"}},
{"assignee_organization": {"_text_any": ["Google", "Alphabet"]}}
]
}
// Array for OR conditions
{"cpc_subclass_id": ["H04N", "H04L"]}
```
### 2. Field List (`f`)
Specifies which fields to return in the response. Optional - each endpoint has default fields.
**Format:** JSON array of field names
```json
["patent_number", "patent_title", "patent_date", "inventor_name"]
```
### 3. Sorting (`s`)
Orders results by specified fields. Optional.
**Format:** JSON array with field name and direction
```json
[{"patent_date": "desc"}]
```
### 4. Options (`o`)
Controls pagination and additional settings. Optional.
**Available options:**
- `page` - Page number (default: 1)
- `per_page` - Records per page (default: 100, max: 1,000)
- `pad_patent_id` - Pad patent IDs with leading zeros (default: false)
- `exclude_withdrawn` - Exclude withdrawn patents (default: true)
**Format:** JSON object
```json
{
"page": 1,
"per_page": 500,
"exclude_withdrawn": false
}
```
## Response Format
All responses follow this structure:
```json
{
"error": false,
"count": 100,
"total_hits": 5432,
"patents": [...],
// or "inventors": [...], "assignees": [...], etc.
}
```
- `error` - Boolean indicating if an error occurred
- `count` - Number of records in current response
- `total_hits` - Total number of matching records
- Endpoint-specific data array (e.g., `patents`, `inventors`)
## Complete Request Example
### Using curl
```bash
curl -X POST "https://search.patentsview.org/api/v1/patent" \
-H "X-Api-Key: YOUR_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"q": {
"_and": [
{"patent_date": {"_gte": "2024-01-01"}},
{"patent_abstract": {"_text_all": ["artificial", "intelligence"]}}
]
},
"f": ["patent_number", "patent_title", "patent_date", "assignee_organization"],
"s": [{"patent_date": "desc"}],
"o": {"per_page": 100}
}'
```
### Using Python
```python
import requests
url = "https://search.patentsview.org/api/v1/patent"
headers = {
"X-Api-Key": "YOUR_API_KEY",
"Content-Type": "application/json"
}
data = {
"q": {
"_and": [
{"patent_date": {"_gte": "2024-01-01"}},
{"patent_abstract": {"_text_all": ["artificial", "intelligence"]}}
]
},
"f": ["patent_number", "patent_title", "patent_date", "assignee_organization"],
"s": [{"patent_date": "desc"}],
"o": {"per_page": 100}
}
response = requests.post(url, headers=headers, json=data)
results = response.json()
```
## Common Field Names
### Patent Endpoint Fields
- `patent_number` - Patent number
- `patent_title` - Title of the patent
- `patent_date` - Grant date
- `patent_abstract` - Abstract text
- `patent_type` - Type of patent
- `inventor_name` - Inventor names (array)
- `assignee_organization` - Assignee company names (array)
- `cpc_subclass_id` - CPC classification codes
- `uspc_class` - US classification codes
- `cited_patent_number` - Citations to other patents
- `citedby_patent_number` - Patents citing this patent
Refer to the full field dictionary at: https://search.patentsview.org/docs/
## Best Practices
1. **Use `_text*` operators for text fields** - More performant than `_contains` or `_begins`
2. **Request only needed fields** - Reduces response size and improves performance
3. **Implement pagination** - Handle large result sets efficiently
4. **Respect rate limits** - Implement backoff/retry logic for 429 errors
5. **Cache results** - Reduce redundant API calls
6. **Use date ranges** - Narrow searches to improve performance
## Error Handling
Common HTTP status codes:
- **200** - Success
- **400** - Bad request (invalid query syntax)
- **401** - Unauthorized (missing or invalid API key)
- **429** - Too many requests (rate limit exceeded)
- **500** - Server error
## Recent Updates (February 2025)
- Data updated through December 31, 2024
- New `pad_patent_id` option for formatting patent IDs
- New `exclude_withdrawn` option to show withdrawn patents
- Text endpoints continue beta backfilling
## Resources
- **Official Documentation**: https://search.patentsview.org/docs/
- **API Key Registration**: https://account.uspto.gov/api-manager/
- **Legacy API Notice**: The old PatentsView API was discontinued May 1, 2025

View File

@@ -0,0 +1,212 @@
# Patent Examination Data System (PEDS) API Reference
## Overview
The Patent Examination Data System (PEDS) provides access to USPTO patent application and filing status records. It contains bibliographic data, published document information, and patent term extension data.
**Data Coverage:** 1981 to present (some data back to 1935)
**Base URL:** Access through USPTO Open Data Portal
## What PEDS Provides
PEDS gives comprehensive transaction history and status information for patent applications:
- **Bibliographic data** - Application numbers, filing dates, titles, inventors, assignees
- **Published documents** - Publication numbers and dates
- **Transaction history** - All examination events with dates, codes, and descriptions
- **Patent term adjustments** - PTA/PTE information
- **Application status** - Current status and status codes
- **File wrapper access** - Links to prosecution documents
## Key Features
1. **Transaction Activity** - Complete examination timeline with transaction dates, codes, and descriptions
2. **Status Information** - Current application status and status codes
3. **Bibliographic Updates** - Changes to inventors, assignees, titles over time
4. **Family Data** - Related applications and continuity data
5. **Office Action Tracking** - Mail dates and office action information
## Python Library: uspto-opendata-python
The recommended way to access PEDS is through the `uspto-opendata-python` library.
### Installation
```bash
pip install uspto-opendata-python
```
### Basic Usage
```python
from uspto.peds import PE DSClient
# Initialize client
client = PEDSClient()
# Search by application number
app_number = "16123456"
result = client.get_application(app_number)
# Access application data
print(f"Title: {result['title']}")
print(f"Filing Date: {result['filing_date']}")
print(f"Status: {result['status']}")
# Get transaction history
transactions = result['transactions']
for trans in transactions:
print(f"{trans['date']}: {trans['code']} - {trans['description']}")
```
### Search Methods
```python
# By application number
client.get_application("16123456")
# By patent number
client.get_patent("11234567")
# By customer number (assignee)
client.search_by_customer_number("12345")
# Bulk retrieval
app_numbers = ["16123456", "16123457", "16123458"]
results = client.bulk_retrieve(app_numbers)
```
## Data Fields
### Bibliographic Fields
- `application_number` - Application number
- `filing_date` - Filing date
- `patent_number` - Patent number (if granted)
- `patent_issue_date` - Issue date (if granted)
- `title` - Application/patent title
- `inventors` - List of inventors
- `assignees` - List of assignees
- `app_type` - Application type (utility, design, plant, reissue)
- `app_status` - Current application status
- `app_status_date` - Status date
### Transaction Fields
- `transaction_date` - Date of transaction
- `transaction_code` - USPTO event code
- `transaction_description` - Description of event
- `mail_date` - Mail room date (for office actions)
### Patent Term Data
- `pta_pte_summary` - Patent term adjustment/extension summary
- `pta_pte_history` - History of term calculations
## Status Codes
Common application status codes:
- **Patented Case** - Patent has been granted
- **Abandoned** - Application is abandoned
- **Pending** - Application is under examination
- **Allowed** - Application has been allowed, awaiting issue
- **Final Rejection** - Final rejection issued
- **Non-Final Rejection** - Non-final rejection issued
- **Response Filed** - Applicant response filed
## Transaction Codes
Common transaction codes include:
- **CTNF** - Non-final rejection mailed
- **CTFR** - Final rejection mailed
- **AOPF** - Office action mailed
- **WRIT** - Response filed
- **NOA** - Notice of allowance mailed
- **ISS.FEE** - Issue fee payment
- **ABND** - Application abandoned
Full code list available in OCE Patent Examination Status/Event Codes API.
## Use Cases
### 1. Track Application Progress
Monitor pending applications for office actions and status changes.
```python
# Get current status
app = client.get_application("16123456")
print(f"Current status: {app['app_status']}")
print(f"Status date: {app['app_status_date']}")
# Check for recent office actions
recent_oas = [t for t in app['transactions']
if t['code'] in ['CTNF', 'CTFR', 'AOPF']
and t['date'] > '2024-01-01']
```
### 2. Portfolio Analysis
Analyze prosecution history across a portfolio.
```python
# Get all applications for an assignee
apps = client.search_by_customer_number("12345")
# Calculate average pendency
pendencies = []
for app in apps:
if app['patent_issue_date']:
filing = datetime.strptime(app['filing_date'], '%Y-%m-%d')
issue = datetime.strptime(app['patent_issue_date'], '%Y-%m-%d')
pendencies.append((issue - filing).days)
avg_pendency = sum(pendencies) / len(pendencies)
print(f"Average pendency: {avg_pendency} days")
```
### 3. Examine Rejection Patterns
Analyze types of rejections received.
```python
# Count rejection types
rejections = {}
for trans in app['transactions']:
if 'rejection' in trans['description'].lower():
code = trans['code']
rejections[code] = rejections.get(code, 0) + 1
```
## Integration with Other APIs
PEDS data can be combined with other USPTO APIs:
- **Office Action Text API** - Retrieve full text of office actions using application number
- **Patent Assignment Search** - Find ownership changes
- **PTAB API** - Check for appeal proceedings
## Important Notes
1. **PAIR Bulk Data (PBD) is decommissioned** - Use PEDS instead
2. **Data updates** - PEDS is updated regularly but may have 1-2 day lag
3. **Application numbers** - Use standardized format (no slashes or spaces)
4. **Continuity data** - Parent/child applications tracked in transaction history
## Best Practices
1. **Batch requests** - Use bulk retrieval for multiple applications
2. **Cache data** - Avoid redundant API calls for same application
3. **Monitor updates** - Check for transaction updates regularly
4. **Handle missing data** - Not all fields populated for all applications
5. **Parse transaction codes** - Use code descriptions for user-friendly display
## Resources
- **Library Documentation**: https://docs.ip-tools.org/uspto-opendata-python/
- **PyPI Package**: https://pypi.org/project/uspto-opendata-python/
- **GitHub Repository**: https://github.com/ip-tools/uspto-opendata-python
- **USPTO PEDS Portal**: https://ped.uspto.gov/

View File

@@ -0,0 +1,358 @@
# USPTO Trademark APIs Reference
## Overview
USPTO provides two main APIs for trademark data:
1. **Trademark Status & Document Retrieval (TSDR)** - Retrieve trademark case status and documents
2. **Trademark Assignment Search** - Search trademark assignment records
## 1. Trademark Status & Document Retrieval (TSDR) API
### Overview
TSDR enables programmatic retrieval of trademark case status documents and information.
**API Version:** v1.0
**Base URL:** `https://tsdrapi.uspto.gov/ts/cd/`
### Authentication
Requires API key registration at: https://account.uspto.gov/api-manager/
Include API key in request header:
```
X-Api-Key: YOUR_API_KEY
```
### Endpoints
#### Get Trademark Status by Serial Number
```
GET /ts/cd/casedocs/sn{serial_number}/info.json
```
**Example:**
```bash
curl -H "X-Api-Key: YOUR_KEY" \
"https://tsdrapi.uspto.gov/ts/cd/casedocs/sn87654321/info.json"
```
#### Get Trademark Status by Registration Number
```
GET /ts/cd/casedocs/rn{registration_number}/info.json
```
### Response Format
Returns JSON with comprehensive trademark information:
```json
{
"TradeMarkAppln": {
"ApplicationNumber": "87654321",
"ApplicationDate": "2017-10-15",
"RegistrationNumber": "5678901",
"RegistrationDate": "2019-03-12",
"MarkVerbalElementText": "EXAMPLE MARK",
"MarkCurrentStatusExternalDescriptionText": "REGISTERED",
"MarkCurrentStatusDate": "2019-03-12",
"GoodsAndServices": [...],
"Owners": [...],
"Correspondents": [...]
}
}
```
### Key Data Fields
- **Application Information:**
- `ApplicationNumber` - Serial number
- `ApplicationDate` - Filing date
- `ApplicationType` - Type (TEAS Plus, TEAS Standard, etc.)
- **Registration Information:**
- `RegistrationNumber` - Registration number (if registered)
- `RegistrationDate` - Registration date
- **Mark Information:**
- `MarkVerbalElementText` - Text of the mark
- `MarkCurrentStatusExternalDescriptionText` - Current status
- `MarkCurrentStatusDate` - Status date
- `MarkDrawingCode` - Type of mark (words, design, etc.)
- **Classification:**
- `GoodsAndServices` - Array of goods/services with classes
- **Owner Information:**
- `Owners` - Array of trademark owners/applicants
- **Prosecution History:**
- `ProsecutionHistoryEntry` - Array of events in prosecution
### Common Status Values
- **REGISTERED** - Mark is registered and active
- **PENDING** - Application under examination
- **ABANDONED** - Application/registration abandoned
- **CANCELLED** - Registration cancelled
- **SUSPENDED** - Examination suspended
- **PUBLISHED FOR OPPOSITION** - Published, in opposition period
- **REGISTERED AND RENEWED** - Registration renewed
### Python Example
```python
import requests
def get_trademark_status(serial_number, api_key):
"""Retrieve trademark status by serial number."""
url = f"https://tsdrapi.uspto.gov/ts/cd/casedocs/sn{serial_number}/info.json"
headers = {"X-Api-Key": api_key}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
raise Exception(f"API error: {response.status_code}")
# Usage
data = get_trademark_status("87654321", "YOUR_API_KEY")
trademark = data['TradeMarkAppln']
print(f"Mark: {trademark['MarkVerbalElementText']}")
print(f"Status: {trademark['MarkCurrentStatusExternalDescriptionText']}")
print(f"Application Date: {trademark['ApplicationDate']}")
if 'RegistrationNumber' in trademark:
print(f"Registration #: {trademark['RegistrationNumber']}")
```
## 2. Trademark Assignment Search API
### Overview
Retrieves trademark assignment records from the USPTO assignment database. Shows ownership transfers and security interests.
**API Version:** v1.4
**Base URL:** `https://assignment-api.uspto.gov/trademark/`
### Authentication
Requires API key in header:
```
X-Api-Key: YOUR_API_KEY
```
### Search Methods
#### By Registration Number
```
GET /v1.4/assignment/application/{registration_number}
```
#### By Serial Number
```
GET /v1.4/assignment/application/{serial_number}
```
#### By Assignee Name
```
POST /v1.4/assignment/search
```
**Request body:**
```json
{
"criteria": {
"assigneeName": "Company Name"
}
}
```
### Response Format
Returns XML containing assignment records:
```xml
<assignments>
<assignment>
<reelFrame>12345/0678</reelFrame>
<conveyanceText>ASSIGNMENT OF ASSIGNORS INTEREST</conveyanceText>
<recordedDate>2020-01-15</recordedDate>
<executionDate>2020-01-10</executionDate>
<assignors>
<assignor>
<name>Original Owner LLC</name>
</assignor>
</assignors>
<assignees>
<assignee>
<name>New Owner Corporation</name>
</assignee>
</assignees>
</assignment>
</assignments>
```
### Key Fields
- `reelFrame` - USPTO reel and frame number
- `conveyanceText` - Type of transaction
- `recordedDate` - Date recorded at USPTO
- `executionDate` - Date document was executed
- `assignors` - Original owners
- `assignees` - New owners
- `propertyNumbers` - Affected serial/registration numbers
### Common Conveyance Types
- **ASSIGNMENT OF ASSIGNORS INTEREST** - Ownership transfer
- **SECURITY AGREEMENT** - Collateral/security interest
- **MERGER** - Corporate merger
- **CHANGE OF NAME** - Name change
- **ASSIGNMENT OF PARTIAL INTEREST** - Partial ownership transfer
### Python Example
```python
import requests
import xml.etree.ElementTree as ET
def search_trademark_assignments(registration_number, api_key):
"""Search assignments for a trademark registration."""
url = f"https://assignment-api.uspto.gov/trademark/v1.4/assignment/application/{registration_number}"
headers = {"X-Api-Key": api_key}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text # Returns XML
else:
raise Exception(f"API error: {response.status_code}")
# Usage
xml_data = search_trademark_assignments("5678901", "YOUR_API_KEY")
root = ET.fromstring(xml_data)
for assignment in root.findall('.//assignment'):
reel_frame = assignment.find('reelFrame').text
recorded_date = assignment.find('recordedDate').text
conveyance = assignment.find('conveyanceText').text
assignor = assignment.find('.//assignor/name').text
assignee = assignment.find('.//assignee/name').text
print(f"{recorded_date}: {assignor} -> {assignee}")
print(f" Type: {conveyance}")
print(f" Reel/Frame: {reel_frame}\n")
```
## Use Cases
### 1. Monitor Trademark Status
Check status of pending applications or registrations:
```python
def check_trademark_health(serial_number, api_key):
"""Check if trademark needs attention."""
data = get_trademark_status(serial_number, api_key)
tm = data['TradeMarkAppln']
status = tm['MarkCurrentStatusExternalDescriptionText']
alerts = []
if 'ABANDON' in status:
alerts.append("⚠️ ABANDONED")
elif 'PUBLISHED' in status:
alerts.append("📢 In opposition period")
elif 'SUSPENDED' in status:
alerts.append("⏸️ Examination suspended")
elif 'REGISTERED' in status:
alerts.append("✅ Active")
return alerts
```
### 2. Track Ownership Changes
Monitor assignment records for ownership changes:
```python
def get_current_owner(registration_number, api_key):
"""Find current trademark owner from assignment records."""
xml_data = search_trademark_assignments(registration_number, api_key)
root = ET.fromstring(xml_data)
assignments = []
for assignment in root.findall('.//assignment'):
date = assignment.find('recordedDate').text
assignee = assignment.find('.//assignee/name').text
assignments.append((date, assignee))
# Most recent assignment
if assignments:
assignments.sort(reverse=True)
return assignments[0][1]
return None
```
### 3. Portfolio Management
Analyze trademark portfolio:
```python
def analyze_portfolio(serial_numbers, api_key):
"""Analyze status of multiple trademarks."""
results = {
'active': 0,
'pending': 0,
'abandoned': 0,
'expired': 0
}
for sn in serial_numbers:
data = get_trademark_status(sn, api_key)
status = data['TradeMarkAppln']['MarkCurrentStatusExternalDescriptionText']
if 'REGISTERED' in status:
results['active'] += 1
elif 'PENDING' in status or 'PUBLISHED' in status:
results['pending'] += 1
elif 'ABANDON' in status:
results['abandoned'] += 1
elif 'EXPIRED' in status or 'CANCELLED' in status:
results['expired'] += 1
return results
```
## Rate Limits and Best Practices
1. **Respect rate limits** - Implement retry logic with exponential backoff
2. **Cache responses** - Trademark data changes infrequently
3. **Batch processing** - Spread requests over time for large portfolios
4. **Error handling** - Handle missing data gracefully (not all marks have all fields)
5. **Data validation** - Verify serial/registration numbers before API calls
## Integration with Other Data
Combine trademark data with other sources:
- **TSDR + Assignment** - Current status + ownership history
- **Multiple marks** - Analyze related marks in a family
- **Patent data** - Cross-reference IP portfolio
## Resources
- **TSDR API**: https://developer.uspto.gov/api-catalog/tsdr-data-api
- **Assignment API**: https://developer.uspto.gov/api-catalog/trademark-assignment-search-data-api
- **API Key Registration**: https://account.uspto.gov/api-manager/
- **Trademark Search**: https://tmsearch.uspto.gov/
- **Swagger Documentation**: https://developer.uspto.gov/swagger/tsdr-api-v1

View File

@@ -0,0 +1,290 @@
#!/usr/bin/env python3
"""
USPTO PatentSearch API Helper
Provides functions for searching and retrieving patent data using the USPTO
PatentSearch API (ElasticSearch-based system, replaced legacy PatentsView in May 2025).
Requires:
- requests library: pip install requests
- USPTO API key from https://account.uspto.gov/api-manager/
Environment variables:
USPTO_API_KEY - Your USPTO API key
"""
import os
import sys
import json
import requests
from typing import Dict, List, Optional, Any
from datetime import datetime
class PatentSearchClient:
"""Client for USPTO PatentSearch API."""
BASE_URL = "https://search.patentsview.org/api/v1"
def __init__(self, api_key: Optional[str] = None):
"""
Initialize client with API key.
Args:
api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var)
"""
self.api_key = api_key or os.getenv("USPTO_API_KEY")
if not self.api_key:
raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.")
self.headers = {
"X-Api-Key": self.api_key,
"Content-Type": "application/json"
}
def _request(self, endpoint: str, query: Dict, fields: Optional[List[str]] = None,
sort: Optional[List[Dict]] = None, options: Optional[Dict] = None) -> Dict:
"""
Make a request to the PatentSearch API.
Args:
endpoint: API endpoint (e.g., "patent", "inventor")
query: Query dictionary
fields: List of fields to return
sort: Sort specification
options: Pagination and other options
Returns:
API response as dictionary
"""
url = f"{self.BASE_URL}/{endpoint}"
data = {"q": query}
if fields:
data["f"] = fields
if sort:
data["s"] = sort
if options:
data["o"] = options
response = requests.post(url, headers=self.headers, json=data)
response.raise_for_status()
return response.json()
def search_patents(self, query: Dict, fields: Optional[List[str]] = None,
sort: Optional[List[Dict]] = None, page: int = 1,
per_page: int = 100) -> Dict:
"""
Search for patents.
Args:
query: Query dictionary (see PatentSearch API docs for syntax)
fields: Fields to return (defaults to essential fields)
sort: Sort specification
page: Page number
per_page: Results per page (max 1000)
Returns:
Search results with patents array
Example:
# Search by keyword
results = client.search_patents({
"patent_abstract": {"_text_all": ["machine", "learning"]}
})
# Search by date range
results = client.search_patents({
"patent_date": {"_gte": "2024-01-01", "_lte": "2024-12-31"}
})
"""
if fields is None:
fields = [
"patent_number", "patent_title", "patent_date",
"patent_abstract", "assignee_organization",
"inventor_name", "cpc_subclass_id"
]
if sort is None:
sort = [{"patent_date": "desc"}]
options = {"page": page, "per_page": min(per_page, 1000)}
return self._request("patent", query, fields, sort, options)
def get_patent(self, patent_number: str) -> Optional[Dict]:
"""
Get details for a specific patent by number.
Args:
patent_number: Patent number (with or without commas)
Returns:
Patent data dictionary or None if not found
"""
# Remove commas from patent number
patent_number = patent_number.replace(",", "")
query = {"patent_number": patent_number}
fields = [
"patent_number", "patent_title", "patent_date", "patent_abstract",
"patent_type", "inventor_name", "assignee_organization",
"cpc_subclass_id", "cited_patent_number", "citedby_patent_number"
]
result = self._request("patent", query, fields)
if result.get("patents"):
return result["patents"][0]
return None
def search_by_inventor(self, inventor_name: str, **kwargs) -> Dict:
"""
Search patents by inventor name.
Args:
inventor_name: Inventor name (use _text_phrase for exact match)
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {"inventor_name": {"_text_phrase": inventor_name}}
return self.search_patents(query, **kwargs)
def search_by_assignee(self, assignee_name: str, **kwargs) -> Dict:
"""
Search patents by assignee/company name.
Args:
assignee_name: Assignee/company name
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {"assignee_organization": {"_text_any": assignee_name.split()}}
return self.search_patents(query, **kwargs)
def search_by_classification(self, cpc_code: str, **kwargs) -> Dict:
"""
Search patents by CPC classification code.
Args:
cpc_code: CPC subclass code (e.g., "H04N", "G06F")
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {"cpc_subclass_id": cpc_code}
return self.search_patents(query, **kwargs)
def search_by_date_range(self, start_date: str, end_date: str, **kwargs) -> Dict:
"""
Search patents by date range.
Args:
start_date: Start date (YYYY-MM-DD)
end_date: End date (YYYY-MM-DD)
**kwargs: Additional search parameters
Returns:
Search results
"""
query = {
"patent_date": {
"_gte": start_date,
"_lte": end_date
}
}
return self.search_patents(query, **kwargs)
def advanced_search(self, keywords: List[str], assignee: Optional[str] = None,
start_date: Optional[str] = None, end_date: Optional[str] = None,
cpc_codes: Optional[List[str]] = None, **kwargs) -> Dict:
"""
Perform advanced search with multiple criteria.
Args:
keywords: List of keywords to search in abstract/title
assignee: Assignee/company name
start_date: Start date (YYYY-MM-DD)
end_date: End date (YYYY-MM-DD)
cpc_codes: List of CPC classification codes
**kwargs: Additional search parameters
Returns:
Search results
"""
conditions = []
# Keyword search in abstract
if keywords:
conditions.append({
"patent_abstract": {"_text_all": keywords}
})
# Assignee filter
if assignee:
conditions.append({
"assignee_organization": {"_text_any": assignee.split()}
})
# Date range
if start_date and end_date:
conditions.append({
"patent_date": {"_gte": start_date, "_lte": end_date}
})
# CPC classification
if cpc_codes:
conditions.append({
"cpc_subclass_id": cpc_codes
})
query = {"_and": conditions} if len(conditions) > 1 else conditions[0]
return self.search_patents(query, **kwargs)
def main():
"""Command-line interface for patent search."""
if len(sys.argv) < 2:
print("Usage:")
print(" python patent_search.py <patent_number>")
print(" python patent_search.py --inventor <name>")
print(" python patent_search.py --assignee <company>")
print(" python patent_search.py --keywords <word1> <word2> ...")
sys.exit(1)
client = PatentSearchClient()
try:
if sys.argv[1] == "--inventor":
results = client.search_by_inventor(" ".join(sys.argv[2:]))
elif sys.argv[1] == "--assignee":
results = client.search_by_assignee(" ".join(sys.argv[2:]))
elif sys.argv[1] == "--keywords":
query = {"patent_abstract": {"_text_all": sys.argv[2:]}}
results = client.search_patents(query)
else:
# Assume patent number
patent = client.get_patent(sys.argv[1])
if patent:
results = {"patents": [patent], "count": 1, "total_hits": 1}
else:
print(f"Patent {sys.argv[1]} not found")
sys.exit(1)
# Print results
print(json.dumps(results, indent=2))
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,253 @@
#!/usr/bin/env python3
"""
USPTO Patent Examination Data System (PEDS) Helper
Provides functions for retrieving patent examination data using the
uspto-opendata-python library.
Requires:
- uspto-opendata-python: pip install uspto-opendata-python
Note: This script provides a simplified interface to PEDS data.
For full functionality, use the uspto-opendata-python library directly.
"""
import sys
import json
from typing import Dict, List, Optional, Any
from datetime import datetime
try:
from uspto.peds import PEDSClient as OriginalPEDSClient
HAS_USPTO_LIB = True
except ImportError:
HAS_USPTO_LIB = False
print("Warning: uspto-opendata-python not installed.", file=sys.stderr)
print("Install with: pip install uspto-opendata-python", file=sys.stderr)
class PEDSHelper:
"""Helper class for accessing PEDS data."""
def __init__(self):
"""Initialize PEDS client."""
if not HAS_USPTO_LIB:
raise ImportError("uspto-opendata-python library required")
self.client = OriginalPEDSClient()
def get_application(self, application_number: str) -> Optional[Dict]:
"""
Get patent application data by application number.
Args:
application_number: Application number (e.g., "16123456")
Returns:
Application data dictionary with:
- title: Application title
- filing_date: Filing date
- status: Current status
- transactions: List of prosecution events
- inventors: List of inventors
- assignees: List of assignees
"""
try:
result = self.client.get_application(application_number)
return self._format_application_data(result)
except Exception as e:
print(f"Error retrieving application {application_number}: {e}", file=sys.stderr)
return None
def get_patent(self, patent_number: str) -> Optional[Dict]:
"""
Get patent data by patent number.
Args:
patent_number: Patent number (e.g., "11234567")
Returns:
Patent data dictionary
"""
try:
result = self.client.get_patent(patent_number)
return self._format_application_data(result)
except Exception as e:
print(f"Error retrieving patent {patent_number}: {e}", file=sys.stderr)
return None
def get_transaction_history(self, application_number: str) -> List[Dict]:
"""
Get transaction history for an application.
Args:
application_number: Application number
Returns:
List of transactions with date, code, and description
"""
app_data = self.get_application(application_number)
if app_data and 'transactions' in app_data:
return app_data['transactions']
return []
def get_office_actions(self, application_number: str) -> List[Dict]:
"""
Get office actions for an application.
Args:
application_number: Application number
Returns:
List of office actions with dates and types
"""
transactions = self.get_transaction_history(application_number)
# Filter for office action transaction codes
oa_codes = ['CTNF', 'CTFR', 'AOPF', 'NOA']
office_actions = [
trans for trans in transactions
if trans.get('code') in oa_codes
]
return office_actions
def get_status_summary(self, application_number: str) -> Dict[str, Any]:
"""
Get a summary of application status.
Args:
application_number: Application number
Returns:
Dictionary with status summary:
- current_status: Current application status
- filing_date: Filing date
- status_date: Status date
- is_patented: Boolean indicating if patented
- patent_number: Patent number if granted
- pendency_days: Days since filing
"""
app_data = self.get_application(application_number)
if not app_data:
return {}
filing_date = app_data.get('filing_date')
if filing_date:
filing_dt = datetime.strptime(filing_date, '%Y-%m-%d')
pendency_days = (datetime.now() - filing_dt).days
else:
pendency_days = None
return {
'current_status': app_data.get('app_status'),
'filing_date': filing_date,
'status_date': app_data.get('app_status_date'),
'is_patented': app_data.get('patent_number') is not None,
'patent_number': app_data.get('patent_number'),
'issue_date': app_data.get('patent_issue_date'),
'pendency_days': pendency_days,
'title': app_data.get('title'),
'inventors': app_data.get('inventors', []),
'assignees': app_data.get('assignees', [])
}
def analyze_prosecution(self, application_number: str) -> Dict[str, Any]:
"""
Analyze prosecution history.
Args:
application_number: Application number
Returns:
Dictionary with prosecution analysis:
- total_office_actions: Count of office actions
- rejections: Count of rejections
- allowance: Boolean if allowed
- response_count: Count of applicant responses
- examination_duration: Days from filing to allowance/abandonment
"""
transactions = self.get_transaction_history(application_number)
app_summary = self.get_status_summary(application_number)
if not transactions:
return {}
analysis = {
'total_office_actions': 0,
'non_final_rejections': 0,
'final_rejections': 0,
'allowance': False,
'responses': 0,
'abandonment': False
}
for trans in transactions:
code = trans.get('code', '')
if code == 'CTNF':
analysis['non_final_rejections'] += 1
analysis['total_office_actions'] += 1
elif code == 'CTFR':
analysis['final_rejections'] += 1
analysis['total_office_actions'] += 1
elif code in ['AOPF', 'OA']:
analysis['total_office_actions'] += 1
elif code == 'NOA':
analysis['allowance'] = True
elif code == 'WRIT':
analysis['responses'] += 1
elif code == 'ABND':
analysis['abandonment'] = True
analysis['status'] = app_summary.get('current_status')
analysis['pendency_days'] = app_summary.get('pendency_days')
return analysis
def _format_application_data(self, raw_data: Dict) -> Dict:
"""Format raw PEDS data into cleaner structure."""
# This is a placeholder - actual implementation depends on
# the structure returned by uspto-opendata-python
return raw_data
def main():
"""Command-line interface for PEDS data."""
if len(sys.argv) < 2:
print("Usage:")
print(" python peds_client.py <application_number>")
print(" python peds_client.py --patent <patent_number>")
print(" python peds_client.py --status <application_number>")
print(" python peds_client.py --analyze <application_number>")
sys.exit(1)
if not HAS_USPTO_LIB:
print("Error: uspto-opendata-python library not installed")
print("Install with: pip install uspto-opendata-python")
sys.exit(1)
helper = PEDSHelper()
try:
if sys.argv[1] == "--patent":
result = helper.get_patent(sys.argv[2])
elif sys.argv[1] == "--status":
result = helper.get_status_summary(sys.argv[2])
elif sys.argv[1] == "--analyze":
result = helper.analyze_prosecution(sys.argv[2])
else:
result = helper.get_application(sys.argv[1])
if result:
print(json.dumps(result, indent=2))
else:
print("No data found", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,263 @@
#!/usr/bin/env python3
"""
USPTO Trademark API Helper
Provides functions for searching and retrieving trademark data using USPTO
Trademark Status & Document Retrieval (TSDR) API.
Requires:
- requests library: pip install requests
- USPTO API key from https://account.uspto.gov/api-manager/
Environment variables:
USPTO_API_KEY - Your USPTO API key
"""
import os
import sys
import json
import requests
from typing import Dict, List, Optional, Any
class TrademarkClient:
"""Client for USPTO Trademark APIs."""
TSDR_BASE_URL = "https://tsdrapi.uspto.gov/ts/cd"
ASSIGNMENT_BASE_URL = "https://assignment-api.uspto.gov/trademark"
def __init__(self, api_key: Optional[str] = None):
"""
Initialize client with API key.
Args:
api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var)
"""
self.api_key = api_key or os.getenv("USPTO_API_KEY")
if not self.api_key:
raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.")
self.headers = {"X-Api-Key": self.api_key}
def get_trademark_by_serial(self, serial_number: str) -> Optional[Dict]:
"""
Get trademark information by serial number.
Args:
serial_number: Trademark serial number (e.g., "87654321")
Returns:
Trademark data dictionary or None if not found
"""
url = f"{self.TSDR_BASE_URL}/casedocs/sn{serial_number}/info.json"
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return None
raise
def get_trademark_by_registration(self, registration_number: str) -> Optional[Dict]:
"""
Get trademark information by registration number.
Args:
registration_number: Trademark registration number (e.g., "5678901")
Returns:
Trademark data dictionary or None if not found
"""
url = f"{self.TSDR_BASE_URL}/casedocs/rn{registration_number}/info.json"
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return None
raise
def get_trademark_status(self, serial_or_registration: str) -> Dict[str, Any]:
"""
Get current status summary for a trademark.
Args:
serial_or_registration: Serial or registration number
Returns:
Status summary dictionary with:
- mark_text: Text of the mark
- status: Current status
- filing_date: Application filing date
- registration_number: Registration number if registered
- registration_date: Registration date if registered
"""
# Try serial number first
data = self.get_trademark_by_serial(serial_or_registration)
# If not found, try registration number
if not data:
data = self.get_trademark_by_registration(serial_or_registration)
if not data:
return {}
tm = data.get('TradeMarkAppln', {})
return {
'mark_text': tm.get('MarkVerbalElementText'),
'status': tm.get('MarkCurrentStatusExternalDescriptionText'),
'status_date': tm.get('MarkCurrentStatusDate'),
'filing_date': tm.get('ApplicationDate'),
'application_number': tm.get('ApplicationNumber'),
'registration_number': tm.get('RegistrationNumber'),
'registration_date': tm.get('RegistrationDate'),
'mark_drawing_code': tm.get('MarkDrawingCode'),
'is_registered': tm.get('RegistrationNumber') is not None
}
def get_goods_and_services(self, serial_or_registration: str) -> List[Dict]:
"""
Get goods and services classification for a trademark.
Args:
serial_or_registration: Serial or registration number
Returns:
List of goods/services entries with classes
"""
data = self.get_trademark_by_serial(serial_or_registration)
if not data:
data = self.get_trademark_by_registration(serial_or_registration)
if not data:
return []
tm = data.get('TradeMarkAppln', {})
return tm.get('GoodsAndServices', [])
def get_owner_info(self, serial_or_registration: str) -> List[Dict]:
"""
Get owner/applicant information for a trademark.
Args:
serial_or_registration: Serial or registration number
Returns:
List of owner entries
"""
data = self.get_trademark_by_serial(serial_or_registration)
if not data:
data = self.get_trademark_by_registration(serial_or_registration)
if not data:
return []
tm = data.get('TradeMarkAppln', {})
return tm.get('Owners', [])
def get_prosecution_history(self, serial_or_registration: str) -> List[Dict]:
"""
Get prosecution history for a trademark.
Args:
serial_or_registration: Serial or registration number
Returns:
List of prosecution events
"""
data = self.get_trademark_by_serial(serial_or_registration)
if not data:
data = self.get_trademark_by_registration(serial_or_registration)
if not data:
return []
tm = data.get('TradeMarkAppln', {})
return tm.get('ProsecutionHistoryEntry', [])
def check_trademark_health(self, serial_or_registration: str) -> Dict[str, Any]:
"""
Check trademark health and identify issues.
Args:
serial_or_registration: Serial or registration number
Returns:
Health check dictionary with alerts and status
"""
status = self.get_trademark_status(serial_or_registration)
if not status:
return {'error': 'Trademark not found'}
current_status = status.get('status', '').upper()
alerts = []
# Check for problematic statuses
if 'ABANDON' in current_status:
alerts.append('⚠️ ABANDONED - Mark is no longer active')
elif 'CANCELLED' in current_status:
alerts.append('⚠️ CANCELLED - Registration cancelled')
elif 'EXPIRED' in current_status:
alerts.append('⚠️ EXPIRED - Registration has expired')
elif 'SUSPENDED' in current_status:
alerts.append('⏸️ SUSPENDED - Examination suspended')
elif 'PUBLISHED' in current_status:
alerts.append('📢 PUBLISHED - In opposition period')
elif 'REGISTERED' in current_status:
alerts.append('✅ ACTIVE - Mark is registered and active')
elif 'PENDING' in current_status:
alerts.append('⏳ PENDING - Application under examination')
return {
'mark': status.get('mark_text'),
'status': current_status,
'status_date': status.get('status_date'),
'alerts': alerts,
'needs_attention': len([a for a in alerts if '⚠️' in a]) > 0
}
def main():
"""Command-line interface for trademark search."""
if len(sys.argv) < 2:
print("Usage:")
print(" python trademark_client.py <serial_or_registration_number>")
print(" python trademark_client.py --status <number>")
print(" python trademark_client.py --health <number>")
print(" python trademark_client.py --goods <number>")
sys.exit(1)
client = TrademarkClient()
try:
if sys.argv[1] == "--status":
result = client.get_trademark_status(sys.argv[2])
elif sys.argv[1] == "--health":
result = client.check_trademark_health(sys.argv[2])
elif sys.argv[1] == "--goods":
result = client.get_goods_and_services(sys.argv[2])
else:
# Get full trademark data
result = client.get_trademark_by_serial(sys.argv[1])
if not result:
result = client.get_trademark_by_registration(sys.argv[1])
if result:
print(json.dumps(result, indent=2))
else:
print(f"Trademark {sys.argv[1]} not found", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()