mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-01-26 16:58:56 +08:00
Add USPTO and OpenTargets databases
This commit is contained in:
367
scientific-databases/opentargets-database/SKILL.md
Normal file
367
scientific-databases/opentargets-database/SKILL.md
Normal file
@@ -0,0 +1,367 @@
|
||||
---
|
||||
name: opentargets-database
|
||||
description: Access and query the Open Targets Platform, a comprehensive resource for therapeutic target identification and validation. Use this skill when working with drug target discovery, investigating target-disease associations, evaluating target tractability and safety, retrieving evidence from genetics/omics/literature supporting target-disease links, finding known drugs for diseases, assessing druggability of genes, or analyzing gene/disease/drug relationships for drug discovery and development.
|
||||
---
|
||||
|
||||
# Open Targets Database
|
||||
|
||||
## Overview
|
||||
|
||||
The Open Targets Platform is a comprehensive resource that supports systematic identification and prioritization of potential therapeutic drug targets. It integrates publicly available datasets including human genetics, omics, literature, and chemical data to build and score target-disease associations.
|
||||
|
||||
**Key capabilities:**
|
||||
- Query target (gene) annotations including tractability, safety, expression
|
||||
- Search for disease-target associations with evidence scores
|
||||
- Retrieve evidence from multiple data types (genetics, pathways, literature, etc.)
|
||||
- Find known drugs for diseases and their mechanisms
|
||||
- Access drug information including clinical trial phases and adverse events
|
||||
- Evaluate target druggability and therapeutic potential
|
||||
|
||||
**Data access:** The platform provides a GraphQL API, web interface, data downloads, and Google BigQuery access. This skill focuses on the GraphQL API for programmatic access.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Use this skill when:
|
||||
|
||||
- **Target discovery:** Finding potential therapeutic targets for a disease
|
||||
- **Target assessment:** Evaluating tractability, safety, and druggability of genes
|
||||
- **Evidence gathering:** Retrieving supporting evidence for target-disease associations
|
||||
- **Drug repurposing:** Identifying existing drugs that could be repurposed for new indications
|
||||
- **Competitive intelligence:** Understanding clinical precedence and drug development landscape
|
||||
- **Target prioritization:** Ranking targets based on genetic evidence and other data types
|
||||
- **Mechanism research:** Investigating biological pathways and gene functions
|
||||
- **Biomarker discovery:** Finding genes differentially expressed in disease
|
||||
- **Safety assessment:** Identifying potential toxicity concerns for drug targets
|
||||
|
||||
## Core Workflow
|
||||
|
||||
### 1. Search for Entities
|
||||
|
||||
Start by finding the identifiers for targets, diseases, or drugs of interest.
|
||||
|
||||
**For targets (genes):**
|
||||
```python
|
||||
from scripts.query_opentargets import search_entities
|
||||
|
||||
# Search by gene symbol or name
|
||||
results = search_entities("BRCA1", entity_types=["target"])
|
||||
# Returns: [{"id": "ENSG00000012048", "name": "BRCA1", ...}]
|
||||
```
|
||||
|
||||
**For diseases:**
|
||||
```python
|
||||
# Search by disease name
|
||||
results = search_entities("alzheimer", entity_types=["disease"])
|
||||
# Returns: [{"id": "EFO_0000249", "name": "Alzheimer disease", ...}]
|
||||
```
|
||||
|
||||
**For drugs:**
|
||||
```python
|
||||
# Search by drug name
|
||||
results = search_entities("aspirin", entity_types=["drug"])
|
||||
# Returns: [{"id": "CHEMBL25", "name": "ASPIRIN", ...}]
|
||||
```
|
||||
|
||||
**Identifiers used:**
|
||||
- Targets: Ensembl gene IDs (e.g., `ENSG00000157764`)
|
||||
- Diseases: EFO (Experimental Factor Ontology) IDs (e.g., `EFO_0000249`)
|
||||
- Drugs: ChEMBL IDs (e.g., `CHEMBL25`)
|
||||
|
||||
### 2. Query Target Information
|
||||
|
||||
Retrieve comprehensive target annotations to assess druggability and biology.
|
||||
|
||||
```python
|
||||
from scripts.query_opentargets import get_target_info
|
||||
|
||||
target_info = get_target_info("ENSG00000157764", include_diseases=True)
|
||||
|
||||
# Access key fields:
|
||||
# - approvedSymbol: HGNC gene symbol
|
||||
# - approvedName: Full gene name
|
||||
# - tractability: Druggability assessments across modalities
|
||||
# - safetyLiabilities: Known safety concerns
|
||||
# - geneticConstraint: Constraint scores from gnomAD
|
||||
# - associatedDiseases: Top disease associations with scores
|
||||
```
|
||||
|
||||
**Key annotations to review:**
|
||||
- **Tractability:** Small molecule, antibody, PROTAC druggability predictions
|
||||
- **Safety:** Known toxicity concerns from multiple databases
|
||||
- **Genetic constraint:** pLI and LOEUF scores indicating essentiality
|
||||
- **Disease associations:** Diseases linked to the target with evidence scores
|
||||
|
||||
Refer to `references/target_annotations.md` for detailed information about all target features.
|
||||
|
||||
### 3. Query Disease Information
|
||||
|
||||
Get disease details and associated targets/drugs.
|
||||
|
||||
```python
|
||||
from scripts.query_opentargets import get_disease_info
|
||||
|
||||
disease_info = get_disease_info("EFO_0000249", include_targets=True)
|
||||
|
||||
# Access fields:
|
||||
# - name: Disease name
|
||||
# - description: Disease description
|
||||
# - therapeuticAreas: High-level disease categories
|
||||
# - associatedTargets: Top targets with association scores
|
||||
```
|
||||
|
||||
### 4. Retrieve Target-Disease Evidence
|
||||
|
||||
Get detailed evidence supporting a target-disease association.
|
||||
|
||||
```python
|
||||
from scripts.query_opentargets import get_target_disease_evidence
|
||||
|
||||
# Get all evidence
|
||||
evidence = get_target_disease_evidence(
|
||||
ensembl_id="ENSG00000157764",
|
||||
efo_id="EFO_0000249"
|
||||
)
|
||||
|
||||
# Filter by evidence type
|
||||
genetic_evidence = get_target_disease_evidence(
|
||||
ensembl_id="ENSG00000157764",
|
||||
efo_id="EFO_0000249",
|
||||
data_types=["genetic_association"]
|
||||
)
|
||||
|
||||
# Each evidence record contains:
|
||||
# - datasourceId: Specific data source (e.g., "gwas_catalog", "chembl")
|
||||
# - datatypeId: Evidence category (e.g., "genetic_association", "known_drug")
|
||||
# - score: Evidence strength (0-1)
|
||||
# - studyId: Original study identifier
|
||||
# - literature: Associated publications
|
||||
```
|
||||
|
||||
**Major evidence types:**
|
||||
1. **genetic_association:** GWAS, rare variants, ClinVar, gene burden
|
||||
2. **somatic_mutation:** Cancer Gene Census, IntOGen, cancer biomarkers
|
||||
3. **known_drug:** Clinical precedence from approved/clinical drugs
|
||||
4. **affected_pathway:** CRISPR screens, pathway analyses, gene signatures
|
||||
5. **rna_expression:** Differential expression from Expression Atlas
|
||||
6. **animal_model:** Mouse phenotypes from IMPC
|
||||
7. **literature:** Text-mining from Europe PMC
|
||||
|
||||
Refer to `references/evidence_types.md` for detailed descriptions of all evidence types and interpretation guidelines.
|
||||
|
||||
### 5. Find Known Drugs
|
||||
|
||||
Identify drugs used for a disease and their targets.
|
||||
|
||||
```python
|
||||
from scripts.query_opentargets import get_known_drugs_for_disease
|
||||
|
||||
drugs = get_known_drugs_for_disease("EFO_0000249")
|
||||
|
||||
# drugs contains:
|
||||
# - uniqueDrugs: Total number of unique drugs
|
||||
# - uniqueTargets: Total number of unique targets
|
||||
# - rows: List of drug-target-indication records with:
|
||||
# - drug: {name, drugType, maximumClinicalTrialPhase}
|
||||
# - targets: Genes targeted by the drug
|
||||
# - phase: Clinical trial phase for this indication
|
||||
# - status: Trial status (active, completed, etc.)
|
||||
# - mechanismOfAction: How drug works
|
||||
```
|
||||
|
||||
**Clinical phases:**
|
||||
- Phase 4: Approved drug
|
||||
- Phase 3: Late-stage clinical trials
|
||||
- Phase 2: Mid-stage trials
|
||||
- Phase 1: Early safety trials
|
||||
|
||||
### 6. Get Drug Information
|
||||
|
||||
Retrieve detailed drug information including mechanisms and indications.
|
||||
|
||||
```python
|
||||
from scripts.query_opentargets import get_drug_info
|
||||
|
||||
drug_info = get_drug_info("CHEMBL25")
|
||||
|
||||
# Access:
|
||||
# - name, synonyms: Drug identifiers
|
||||
# - drugType: Small molecule, antibody, etc.
|
||||
# - maximumClinicalTrialPhase: Development stage
|
||||
# - mechanismsOfAction: Target and action type
|
||||
# - indications: Diseases with trial phases
|
||||
# - withdrawnNotice: If withdrawn, reasons and countries
|
||||
```
|
||||
|
||||
### 7. Get All Associations for a Target
|
||||
|
||||
Find all diseases associated with a target, optionally filtering by score.
|
||||
|
||||
```python
|
||||
from scripts.query_opentargets import get_target_associations
|
||||
|
||||
# Get associations with score >= 0.5
|
||||
associations = get_target_associations(
|
||||
ensembl_id="ENSG00000157764",
|
||||
min_score=0.5
|
||||
)
|
||||
|
||||
# Each association contains:
|
||||
# - disease: {id, name}
|
||||
# - score: Overall association score (0-1)
|
||||
# - datatypeScores: Breakdown by evidence type
|
||||
```
|
||||
|
||||
**Association scores:**
|
||||
- Range: 0-1 (higher = stronger evidence)
|
||||
- Aggregate evidence across all data types using harmonic sum
|
||||
- NOT confidence scores but relative ranking metrics
|
||||
- Under-studied diseases may have lower scores despite good evidence
|
||||
|
||||
## GraphQL API Details
|
||||
|
||||
**For custom queries beyond the provided helper functions**, use the GraphQL API directly or modify `scripts/query_opentargets.py`.
|
||||
|
||||
Key information:
|
||||
- **Endpoint:** `https://api.platform.opentargets.org/api/v4/graphql`
|
||||
- **Interactive browser:** `https://api.platform.opentargets.org/api/v4/graphql/browser`
|
||||
- **No authentication required**
|
||||
- **Request only needed fields** to minimize response size
|
||||
- **Use pagination** for large result sets: `page: {size: N, index: M}`
|
||||
|
||||
Refer to `references/api_reference.md` for:
|
||||
- Complete endpoint documentation
|
||||
- Example queries for all entity types
|
||||
- Error handling patterns
|
||||
- Best practices for API usage
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Target Prioritization Strategy
|
||||
|
||||
When prioritizing drug targets:
|
||||
|
||||
1. **Start with genetic evidence:** Human genetics (GWAS, rare variants) provides strongest disease relevance
|
||||
2. **Check tractability:** Prefer targets with clinical or discovery precedence
|
||||
3. **Assess safety:** Review safety liabilities, expression patterns, and genetic constraint
|
||||
4. **Evaluate clinical precedence:** Known drugs indicate druggability and therapeutic window
|
||||
5. **Consider multiple evidence types:** Convergent evidence from different sources increases confidence
|
||||
6. **Validate mechanistically:** Pathway evidence and biological plausibility
|
||||
7. **Review literature manually:** For critical decisions, examine primary publications
|
||||
|
||||
### Evidence Interpretation
|
||||
|
||||
**Strong evidence indicators:**
|
||||
- Multiple independent evidence sources
|
||||
- High genetic association scores (especially GWAS with L2G > 0.5)
|
||||
- Clinical precedence from approved drugs
|
||||
- ClinVar pathogenic variants with disease match
|
||||
- Mouse models with relevant phenotypes
|
||||
|
||||
**Caution flags:**
|
||||
- Single evidence source only
|
||||
- Text-mining as sole evidence (requires manual validation)
|
||||
- Conflicting evidence across sources
|
||||
- High essentiality + ubiquitous expression (poor therapeutic window)
|
||||
- Multiple safety liabilities
|
||||
|
||||
**Score interpretation:**
|
||||
- Scores rank relative strength, not absolute confidence
|
||||
- Under-studied diseases have lower scores despite potentially valid targets
|
||||
- Weight expert-curated sources higher than computational predictions
|
||||
- Check evidence breakdown, not just overall score
|
||||
|
||||
### Common Workflows
|
||||
|
||||
**Workflow 1: Target Discovery for a Disease**
|
||||
1. Search for disease → get EFO ID
|
||||
2. Query disease info with `include_targets=True`
|
||||
3. Review top targets sorted by association score
|
||||
4. For promising targets, get detailed target info
|
||||
5. Examine evidence types supporting each association
|
||||
6. Assess tractability and safety for prioritized targets
|
||||
|
||||
**Workflow 2: Target Validation**
|
||||
1. Search for target → get Ensembl ID
|
||||
2. Get comprehensive target info
|
||||
3. Check tractability (especially clinical precedence)
|
||||
4. Review safety liabilities and genetic constraint
|
||||
5. Examine disease associations to understand biology
|
||||
6. Look for chemical probes or tool compounds
|
||||
7. Check known drugs targeting gene for mechanism insights
|
||||
|
||||
**Workflow 3: Drug Repurposing**
|
||||
1. Search for disease → get EFO ID
|
||||
2. Get known drugs for disease
|
||||
3. For each drug, get detailed drug info
|
||||
4. Examine mechanisms of action and targets
|
||||
5. Look for related disease indications
|
||||
6. Assess clinical trial phases and status
|
||||
7. Identify repurposing opportunities based on mechanism
|
||||
|
||||
**Workflow 4: Competitive Intelligence**
|
||||
1. Search for target of interest
|
||||
2. Get associated diseases with evidence
|
||||
3. For each disease, get known drugs
|
||||
4. Review clinical phases and development status
|
||||
5. Identify competitors and their mechanisms
|
||||
6. Assess clinical precedence and market landscape
|
||||
|
||||
## Resources
|
||||
|
||||
### Scripts
|
||||
|
||||
**scripts/query_opentargets.py**
|
||||
Helper functions for common API operations:
|
||||
- `search_entities()` - Search for targets, diseases, or drugs
|
||||
- `get_target_info()` - Retrieve target annotations
|
||||
- `get_disease_info()` - Retrieve disease information
|
||||
- `get_target_disease_evidence()` - Get supporting evidence
|
||||
- `get_known_drugs_for_disease()` - Find drugs for a disease
|
||||
- `get_drug_info()` - Retrieve drug details
|
||||
- `get_target_associations()` - Get all associations for a target
|
||||
- `execute_query()` - Execute custom GraphQL queries
|
||||
|
||||
### References
|
||||
|
||||
**references/api_reference.md**
|
||||
Complete GraphQL API documentation including:
|
||||
- Endpoint details and authentication
|
||||
- Available query types (target, disease, drug, search)
|
||||
- Example queries for all common operations
|
||||
- Error handling and best practices
|
||||
- Data licensing and citation requirements
|
||||
|
||||
**references/evidence_types.md**
|
||||
Comprehensive guide to evidence types and data sources:
|
||||
- Detailed descriptions of all 7 major evidence types
|
||||
- Scoring methodologies for each source
|
||||
- Evidence interpretation guidelines
|
||||
- Strengths and limitations of each evidence type
|
||||
- Quality assessment recommendations
|
||||
|
||||
**references/target_annotations.md**
|
||||
Complete target annotation reference:
|
||||
- 12 major annotation categories explained
|
||||
- Tractability assessment details
|
||||
- Safety liability sources
|
||||
- Expression, essentiality, and constraint data
|
||||
- Interpretation guidelines for target prioritization
|
||||
- Red flags and green flags for target assessment
|
||||
|
||||
## Data Updates and Versioning
|
||||
|
||||
The Open Targets Platform is updated **quarterly** with new data releases. The current release (as of October 2025) is available at the API endpoint.
|
||||
|
||||
**Release information:** Check https://platform-docs.opentargets.org/release-notes for the latest updates.
|
||||
|
||||
**Citation:** When using Open Targets data, cite:
|
||||
Ochoa, D. et al. (2025) Open Targets Platform: facilitating therapeutic hypotheses building in drug discovery. Nucleic Acids Research, 53(D1):D1467-D1477.
|
||||
|
||||
## Limitations and Considerations
|
||||
|
||||
1. **API is for exploratory queries:** For systematic analyses of many targets/diseases, use data downloads or BigQuery
|
||||
2. **Scores are relative, not absolute:** Association scores rank evidence strength but don't predict clinical success
|
||||
3. **Under-studied diseases score lower:** Novel or rare diseases may have strong evidence but lower aggregate scores
|
||||
4. **Evidence quality varies:** Weight expert-curated sources higher than computational predictions
|
||||
5. **Requires biological interpretation:** Scores and evidence must be interpreted in biological and clinical context
|
||||
6. **No authentication required:** All data is freely accessible, but cite appropriately
|
||||
@@ -0,0 +1,249 @@
|
||||
# Open Targets Platform API Reference
|
||||
|
||||
## API Endpoint
|
||||
|
||||
```
|
||||
https://api.platform.opentargets.org/api/v4/graphql
|
||||
```
|
||||
|
||||
Interactive GraphQL playground with documentation:
|
||||
```
|
||||
https://api.platform.opentargets.org/api/v4/graphql/browser
|
||||
```
|
||||
|
||||
## Access Methods
|
||||
|
||||
The Open Targets Platform provides multiple access methods:
|
||||
|
||||
1. **GraphQL API** - Best for single entity queries and flexible data retrieval
|
||||
2. **Web Interface** - Interactive platform at https://platform.opentargets.org
|
||||
3. **Data Downloads** - FTP at https://ftp.ebi.ac.uk/pub/databases/opentargets/platform/
|
||||
4. **Google BigQuery** - For large-scale systematic queries
|
||||
|
||||
## Authentication
|
||||
|
||||
No authentication is required for the GraphQL API. All data is freely accessible.
|
||||
|
||||
## Rate Limits
|
||||
|
||||
For systematic queries involving multiple targets or diseases, use dataset downloads or BigQuery instead of repeated API calls. The API is optimized for single-entity and exploratory queries.
|
||||
|
||||
## GraphQL Query Structure
|
||||
|
||||
GraphQL queries consist of:
|
||||
1. Query operation with optional variables
|
||||
2. Field selection (request only needed fields)
|
||||
3. Nested entity traversal
|
||||
|
||||
### Basic Python Example
|
||||
|
||||
```python
|
||||
import requests
|
||||
import json
|
||||
|
||||
# Define the query
|
||||
query_string = """
|
||||
query target($ensemblId: String!){
|
||||
target(ensemblId: $ensemblId){
|
||||
id
|
||||
approvedSymbol
|
||||
biotype
|
||||
geneticConstraint {
|
||||
constraintType
|
||||
exp
|
||||
obs
|
||||
score
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# Define variables
|
||||
variables = {"ensemblId": "ENSG00000169083"}
|
||||
|
||||
# Make the request
|
||||
base_url = "https://api.platform.opentargets.org/api/v4/graphql"
|
||||
response = requests.post(base_url, json={"query": query_string, "variables": variables})
|
||||
data = json.loads(response.text)
|
||||
print(data)
|
||||
```
|
||||
|
||||
## Available Query Endpoints
|
||||
|
||||
### /target
|
||||
Retrieve gene annotations, tractability assessments, and disease associations.
|
||||
|
||||
**Common fields:**
|
||||
- `id` - Ensembl gene ID
|
||||
- `approvedSymbol` - HGNC gene symbol
|
||||
- `approvedName` - Full gene name
|
||||
- `biotype` - Gene type (protein_coding, etc.)
|
||||
- `tractability` - Druggability assessment
|
||||
- `safetyLiabilities` - Safety information
|
||||
- `expressions` - Baseline expression data
|
||||
- `knownDrugs` - Approved/clinical drugs
|
||||
- `associatedDiseases` - Disease associations with evidence
|
||||
|
||||
### /disease
|
||||
Retrieve disease/phenotype data, known drugs, and clinical information.
|
||||
|
||||
**Common fields:**
|
||||
- `id` - EFO disease identifier
|
||||
- `name` - Disease name
|
||||
- `description` - Disease description
|
||||
- `therapeuticAreas` - High-level disease categories
|
||||
- `synonyms` - Alternative names
|
||||
- `knownDrugs` - Drugs indicated for disease
|
||||
- `associatedTargets` - Target associations with evidence
|
||||
|
||||
### /drug
|
||||
Retrieve compound details, mechanisms of action, and pharmacovigilance data.
|
||||
|
||||
**Common fields:**
|
||||
- `id` - ChEMBL identifier
|
||||
- `name` - Drug name
|
||||
- `drugType` - Small molecule, antibody, etc.
|
||||
- `maximumClinicalTrialPhase` - Development stage
|
||||
- `indications` - Disease indications
|
||||
- `mechanismsOfAction` - Target mechanisms
|
||||
- `adverseEvents` - Pharmacovigilance data
|
||||
|
||||
### /search
|
||||
Search across all entities (targets, diseases, drugs).
|
||||
|
||||
**Parameters:**
|
||||
- `queryString` - Search term
|
||||
- `entityNames` - Filter by entity type(s)
|
||||
- `page` - Pagination
|
||||
|
||||
### /associationDiseaseIndirect
|
||||
Retrieve target-disease associations including indirect evidence from disease descendants in ontology.
|
||||
|
||||
**Key fields:**
|
||||
- `rows` - Association records with scores
|
||||
- `aggregations` - Aggregated statistics
|
||||
|
||||
## Example Queries
|
||||
|
||||
### Query 1: Get target information with disease associations
|
||||
|
||||
```python
|
||||
query = """
|
||||
query targetInfo($ensemblId: String!) {
|
||||
target(ensemblId: $ensemblId) {
|
||||
approvedSymbol
|
||||
approvedName
|
||||
tractability {
|
||||
label
|
||||
modality
|
||||
value
|
||||
}
|
||||
associatedDiseases(page: {size: 10}) {
|
||||
rows {
|
||||
disease {
|
||||
name
|
||||
}
|
||||
score
|
||||
datatypeScores {
|
||||
componentId
|
||||
score
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {"ensemblId": "ENSG00000157764"}
|
||||
```
|
||||
|
||||
### Query 2: Search for diseases
|
||||
|
||||
```python
|
||||
query = """
|
||||
query searchDiseases($queryString: String!) {
|
||||
search(queryString: $queryString, entityNames: ["disease"]) {
|
||||
hits {
|
||||
id
|
||||
entity
|
||||
name
|
||||
description
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {"queryString": "alzheimer"}
|
||||
```
|
||||
|
||||
### Query 3: Get evidence for target-disease pair
|
||||
|
||||
```python
|
||||
query = """
|
||||
query evidences($ensemblId: String!, $efoId: String!) {
|
||||
disease(efoId: $efoId) {
|
||||
evidences(ensemblIds: [$ensemblId], size: 100) {
|
||||
rows {
|
||||
datasourceId
|
||||
datatypeId
|
||||
score
|
||||
studyId
|
||||
literature
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {"ensemblId": "ENSG00000157764", "efoId": "EFO_0000249"}
|
||||
```
|
||||
|
||||
### Query 4: Get known drugs for a disease
|
||||
|
||||
```python
|
||||
query = """
|
||||
query knownDrugs($efoId: String!) {
|
||||
disease(efoId: $efoId) {
|
||||
knownDrugs {
|
||||
uniqueDrugs
|
||||
rows {
|
||||
drug {
|
||||
name
|
||||
id
|
||||
}
|
||||
targets {
|
||||
approvedSymbol
|
||||
}
|
||||
phase
|
||||
status
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {"efoId": "EFO_0000249"}
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
GraphQL returns status code 200 even for errors. Check the response structure:
|
||||
|
||||
```python
|
||||
if 'errors' in response_data:
|
||||
print(f"GraphQL errors: {response_data['errors']}")
|
||||
else:
|
||||
print(f"Data: {response_data['data']}")
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Request only needed fields** - Minimize data transfer and improve response time
|
||||
2. **Use variables** - Make queries reusable and safer
|
||||
3. **Handle pagination** - Most list fields support pagination with `page: {size: N, index: M}`
|
||||
4. **Explore the schema** - Use the GraphQL browser to discover available fields
|
||||
5. **Batch related queries** - Combine multiple entity fetches in a single query when possible
|
||||
6. **Cache results** - Store frequently accessed data locally to reduce API calls
|
||||
7. **Use BigQuery for bulk** - Switch to BigQuery/downloads for systematic analyses
|
||||
|
||||
## Data Licensing
|
||||
|
||||
All Open Targets Platform data is freely available. When using the data in research or commercial products, cite the latest publication:
|
||||
|
||||
Ochoa, D. et al. (2025) Open Targets Platform: facilitating therapeutic hypotheses building in drug discovery. Nucleic Acids Research, 53(D1):D1467-D1477.
|
||||
@@ -0,0 +1,306 @@
|
||||
# Evidence Types and Data Sources
|
||||
|
||||
## Overview
|
||||
|
||||
Evidence represents any event or set of events that identifies a target as a potential causal gene or protein for a disease. Evidence is standardized and mapped to:
|
||||
- **Ensembl gene IDs** for targets
|
||||
- **EFO (Experimental Factor Ontology)** for diseases/phenotypes
|
||||
|
||||
Evidence is organized into **data types** (broader categories) and **data sources** (specific databases/studies).
|
||||
|
||||
## Evidence Data Types
|
||||
|
||||
### 1. Genetic Association
|
||||
|
||||
Evidence from human genetics linking genetic variants to disease phenotypes.
|
||||
|
||||
#### Data Sources:
|
||||
|
||||
**GWAS (Genome-Wide Association Studies)**
|
||||
- Population-level common variant associations
|
||||
- Filtered with Locus-to-Gene (L2G) scores >0.05
|
||||
- Includes fine-mapping and colocalization data
|
||||
- Sources: GWAS Catalog, FinnGen, UK Biobank, EBI GWAS
|
||||
|
||||
**Gene Burden Tests**
|
||||
- Rare variant association analyses
|
||||
- Aggregate effects of multiple rare variants in a gene
|
||||
- Particularly relevant for Mendelian and rare diseases
|
||||
|
||||
**ClinVar Germline**
|
||||
- Clinical variant interpretations
|
||||
- Classifications: pathogenic, likely pathogenic, VUS, benign
|
||||
- Expert-reviewed variant-disease associations
|
||||
|
||||
**Genomics England PanelApp**
|
||||
- Expert gene-disease ratings
|
||||
- Green (confirmed), amber (probable), red (no evidence)
|
||||
- Focus on rare diseases and cancer
|
||||
|
||||
**Gene2Phenotype**
|
||||
- Curated gene-disease relationships
|
||||
- Allelic requirements and inheritance patterns
|
||||
- Clinical validity assessments
|
||||
|
||||
**UniProt Literature & Variants**
|
||||
- Literature-based gene-disease associations
|
||||
- Expert-curated from scientific publications
|
||||
|
||||
**Orphanet**
|
||||
- Rare disease gene associations
|
||||
- Expert-reviewed and maintained
|
||||
|
||||
**ClinGen**
|
||||
- Clinical genome resource classifications
|
||||
- Gene-disease validity assertions
|
||||
|
||||
### 2. Somatic Mutations
|
||||
|
||||
Evidence from cancer genomics identifying driver genes and therapeutic targets.
|
||||
|
||||
#### Data Sources:
|
||||
|
||||
**Cancer Gene Census**
|
||||
- Expert-curated cancer genes
|
||||
- Tier classifications (1 = strong evidence, 2 = emerging)
|
||||
- Mutation types and cancer types
|
||||
|
||||
**IntOGen**
|
||||
- Computational driver gene predictions
|
||||
- Aggregated from large cohort studies
|
||||
- Statistical significance of mutations
|
||||
|
||||
**ClinVar Somatic**
|
||||
- Somatic clinical variant interpretations
|
||||
- Oncogenic/likely oncogenic classifications
|
||||
|
||||
**Cancer Biomarkers**
|
||||
- FDA/EMA approved biomarkers
|
||||
- Clinical trial biomarkers
|
||||
- Prognostic and predictive markers
|
||||
|
||||
### 3. Known Drugs
|
||||
|
||||
Evidence from clinical precedence showing drugs targeting genes for disease indications.
|
||||
|
||||
#### Data Source:
|
||||
|
||||
**ChEMBL**
|
||||
- Approved drugs (Phase 4)
|
||||
- Clinical candidates (Phase 1-3)
|
||||
- Withdrawn drugs
|
||||
- Drug-target-indication triplets with mechanism of action
|
||||
|
||||
**Clinical Trial Information:**
|
||||
- `phase`: Maximum clinical trial phase (1, 2, 3, 4)
|
||||
- `status`: Active, terminated, completed, withdrawn
|
||||
- `mechanismOfAction`: How drug affects target
|
||||
|
||||
### 4. Affected Pathways
|
||||
|
||||
Evidence linking genes to disease through pathway perturbations and functional screens.
|
||||
|
||||
#### Data Sources:
|
||||
|
||||
**CRISPR Screens**
|
||||
- Genome-scale knockout screens
|
||||
- Cancer dependency and essentiality data
|
||||
|
||||
**Project Score (Cancer Dependency Map)**
|
||||
- CRISPR-Cas9 fitness screens across cancer cell lines
|
||||
- Gene essentiality profiles
|
||||
|
||||
**SLAPenrich**
|
||||
- Pathway enrichment analysis
|
||||
- Somatic mutation pathway impacts
|
||||
|
||||
**PROGENy**
|
||||
- Pathway activity inference
|
||||
- Signaling pathway perturbations
|
||||
|
||||
**Reactome**
|
||||
- Expert-curated pathway annotations
|
||||
- Biological pathway representations
|
||||
|
||||
**Gene Signatures**
|
||||
- Expression-based signatures
|
||||
- Pathway activity patterns
|
||||
|
||||
### 5. RNA Expression
|
||||
|
||||
Evidence from differential gene expression in disease vs. control tissues.
|
||||
|
||||
#### Data Source:
|
||||
|
||||
**Expression Atlas**
|
||||
- Differential expression data
|
||||
- Baseline expression across tissues/conditions
|
||||
- RNA-Seq and microarray studies
|
||||
- Log2 fold-change and p-values
|
||||
|
||||
### 6. Animal Models
|
||||
|
||||
Evidence from in vivo studies showing phenotypes associated with gene perturbations.
|
||||
|
||||
#### Data Source:
|
||||
|
||||
**IMPC (International Mouse Phenotyping Consortium)**
|
||||
- Systematic mouse knockout phenotypes
|
||||
- Phenotype-disease mappings via ontologies
|
||||
- Standardized phenotyping procedures
|
||||
|
||||
### 7. Literature
|
||||
|
||||
Evidence from text-mining of biomedical literature.
|
||||
|
||||
#### Data Source:
|
||||
|
||||
**Europe PMC**
|
||||
- Co-occurrence of genes and diseases in abstracts
|
||||
- Normalized citation counts
|
||||
- Weighted by publication type and recency
|
||||
|
||||
## Evidence Scoring
|
||||
|
||||
Each evidence source has its own scoring methodology:
|
||||
|
||||
### Score Ranges
|
||||
- Most scores normalized to 0-1 range
|
||||
- Higher scores indicate stronger evidence
|
||||
- Scores are NOT confidence levels but relative strength indicators
|
||||
|
||||
### Common Scoring Approaches:
|
||||
|
||||
**Binary Classifications:**
|
||||
- ClinVar: Pathogenic (1.0), Likely pathogenic (0.99), etc.
|
||||
- Gene2Phenotype: Confirmed/probable ratings
|
||||
- PanelApp: Green/amber/red classifications
|
||||
|
||||
**Statistical Measures:**
|
||||
- GWAS: L2G scores incorporating multiple lines of evidence
|
||||
- Gene Burden: Statistical significance of variant aggregation
|
||||
- Expression: Adjusted p-values and fold-changes
|
||||
|
||||
**Clinical Precedence:**
|
||||
- Known Drugs: Phase weights (Phase 4 = 1.0, Phase 3 = 0.8, etc.)
|
||||
- Clinical status modifiers
|
||||
|
||||
**Computational Predictions:**
|
||||
- IntOGen: Q-values from driver mutation analysis
|
||||
- PROGENy/SLAPenrich: Pathway activity/enrichment scores
|
||||
|
||||
## Evidence Interpretation Guidelines
|
||||
|
||||
### Strengths by Data Type
|
||||
|
||||
**Genetic Association** - Strongest human genetic evidence
|
||||
- Direct link between genetic variation and disease
|
||||
- Mendelian diseases: high confidence
|
||||
- GWAS: requires L2G to identify causal gene
|
||||
- Consider ancestry and population-specific effects
|
||||
|
||||
**Somatic Mutations** - Direct evidence in cancer
|
||||
- Strong for oncology indications
|
||||
- Driver mutations indicate therapeutic potential
|
||||
- Consider cancer type specificity
|
||||
|
||||
**Known Drugs** - Clinical validation
|
||||
- Highest confidence: approved drugs (Phase 4)
|
||||
- Consider mechanism relevance to new indication
|
||||
- Phase 1-2: early evidence, higher risk
|
||||
|
||||
**Affected Pathways** - Mechanistic insights
|
||||
- Supports biological plausibility
|
||||
- May not predict clinical success
|
||||
- Useful for hypothesis generation
|
||||
|
||||
**RNA Expression** - Observational evidence
|
||||
- Correlation, not causation
|
||||
- May reflect disease consequence vs. cause
|
||||
- Useful for biomarker identification
|
||||
|
||||
**Animal Models** - Translational evidence
|
||||
- Strong for understanding biology
|
||||
- Variable translation to human disease
|
||||
- Most useful when phenotype matches human disease
|
||||
|
||||
**Literature** - Exploratory signal
|
||||
- Text-mining captures research focus
|
||||
- May reflect publication bias
|
||||
- Requires manual literature review for validation
|
||||
|
||||
### Important Considerations
|
||||
|
||||
1. **Multiple evidence types strengthen confidence** - Convergent evidence from different data types provides stronger support
|
||||
|
||||
2. **Under-studied diseases score lower** - Novel or rare diseases may have strong evidence but lower aggregate scores due to limited research
|
||||
|
||||
3. **Association scores are not probabilities** - Scores rank relative evidence strength, not success probability
|
||||
|
||||
4. **Context matters** - Evidence strength depends on:
|
||||
- Disease mechanism understanding
|
||||
- Target biology and druggability
|
||||
- Clinical precedence in related indications
|
||||
- Safety considerations
|
||||
|
||||
5. **Data source reliability varies** - Weight expert-curated sources (ClinGen, Gene2Phenotype) higher than computational predictions
|
||||
|
||||
## Using Evidence in Queries
|
||||
|
||||
### Filtering by Data Type
|
||||
|
||||
```python
|
||||
query = """
|
||||
query evidenceByType($ensemblId: String!, $efoId: String!, $dataTypes: [String!]) {
|
||||
disease(efoId: $efoId) {
|
||||
evidences(ensemblIds: [$ensemblId], datatypes: $dataTypes) {
|
||||
rows {
|
||||
datasourceId
|
||||
score
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
variables = {
|
||||
"ensemblId": "ENSG00000157764",
|
||||
"efoId": "EFO_0000249",
|
||||
"dataTypes": ["genetic_association", "somatic_mutation"]
|
||||
}
|
||||
```
|
||||
|
||||
### Accessing Data Type Scores
|
||||
|
||||
Data type scores aggregate all source scores within that type:
|
||||
|
||||
```python
|
||||
query = """
|
||||
query associationScores($ensemblId: String!, $efoId: String!) {
|
||||
target(ensemblId: $ensemblId) {
|
||||
associatedDiseases(efoIds: [$efoId]) {
|
||||
rows {
|
||||
disease {
|
||||
name
|
||||
}
|
||||
score
|
||||
datatypeScores {
|
||||
componentId
|
||||
score
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
```
|
||||
|
||||
## Evidence Quality Assessment
|
||||
|
||||
When evaluating evidence:
|
||||
|
||||
1. **Check multiple sources** - Single source may be unreliable
|
||||
2. **Prioritize human genetic evidence** - Strongest disease relevance
|
||||
3. **Consider clinical precedence** - Known drugs indicate druggability
|
||||
4. **Assess mechanistic support** - Pathway evidence supports biology
|
||||
5. **Review literature manually** - For critical decisions, read primary publications
|
||||
6. **Validate in primary databases** - Cross-reference with ClinVar, ClinGen, etc.
|
||||
@@ -0,0 +1,401 @@
|
||||
# Target Annotations and Features
|
||||
|
||||
## Overview
|
||||
|
||||
Open Targets defines a target as "any naturally-occurring molecule that can be targeted by a medicinal product." Targets are primarily protein-coding genes identified by Ensembl gene IDs, but also include RNAs and pseudogenes from canonical chromosomes.
|
||||
|
||||
## Core Target Annotations
|
||||
|
||||
### 1. Tractability Assessment
|
||||
|
||||
Tractability evaluates the druggability potential of a target across different modalities.
|
||||
|
||||
#### Modalities Assessed:
|
||||
|
||||
**Small Molecule**
|
||||
- Prediction of small molecule druggability
|
||||
- Based on structural features, chemical precedence
|
||||
- Buckets: Clinical precedence, Discovery precedence, Predicted tractable
|
||||
|
||||
**Antibody**
|
||||
- Likelihood of antibody-based therapeutic success
|
||||
- Cell surface/secreted protein location
|
||||
- Precedence categories similar to small molecules
|
||||
|
||||
**PROTAC (Protein Degradation)**
|
||||
- Assessment for targeted protein degradation
|
||||
- E3 ligase compatibility
|
||||
- Emerging modality category
|
||||
|
||||
**Other Modalities**
|
||||
- Gene therapy, RNA-based therapeutics
|
||||
- Oligonucleotide approaches
|
||||
|
||||
#### Tractability Levels:
|
||||
|
||||
1. **Clinical Precedence** - Target of approved/clinical drug with similar mechanism
|
||||
2. **Discovery Precedence** - Target of tool compounds or compounds in preclinical development
|
||||
3. **Predicted Tractable** - Computational predictions suggest druggability
|
||||
4. **Unknown** - Insufficient data to assess
|
||||
|
||||
### 2. Safety Liabilities
|
||||
|
||||
Safety information aggregated from multiple sources to identify potential toxicity concerns.
|
||||
|
||||
#### Data Sources:
|
||||
|
||||
**ToxCast**
|
||||
- High-throughput toxicology screening data
|
||||
- In vitro assay results
|
||||
- Toxicity pathway activation
|
||||
|
||||
**AOPWiki (Adverse Outcome Pathways)**
|
||||
- Mechanistic pathways from molecular initiating event to adverse outcome
|
||||
- Systems toxicology frameworks
|
||||
|
||||
**PharmGKB**
|
||||
- Pharmacogenomic relationships
|
||||
- Genetic variants affecting drug response and toxicity
|
||||
|
||||
**Published Literature**
|
||||
- Expert-curated safety concerns from publications
|
||||
- Clinical trial adverse events
|
||||
|
||||
#### Safety Flags:
|
||||
|
||||
- **Organ toxicity** - Liver, kidney, cardiac effects
|
||||
- **Target safety liability** - Known on-target toxic effects
|
||||
- **Off-target effects** - Unintended activity concerns
|
||||
- **Clinical observations** - Adverse events from drugs targeting gene
|
||||
|
||||
### 3. Baseline Expression
|
||||
|
||||
Gene/protein expression across tissues and cell types from multiple sources.
|
||||
|
||||
#### Data Sources:
|
||||
|
||||
**Expression Atlas**
|
||||
- RNA-Seq expression across tissues/conditions
|
||||
- Normalized expression levels (TPM, FPKM)
|
||||
- Differential expression studies
|
||||
|
||||
**GTEx (Genotype-Tissue Expression)**
|
||||
- Comprehensive tissue expression from healthy donors
|
||||
- Median TPM across 53 tissues
|
||||
- Expression variation analysis
|
||||
|
||||
**Human Protein Atlas**
|
||||
- Protein expression via immunohistochemistry
|
||||
- Subcellular localization
|
||||
- Tissue specificity classifications
|
||||
|
||||
#### Expression Metrics:
|
||||
|
||||
- **TPM (Transcripts Per Million)** - Normalized RNA abundance
|
||||
- **Tissue specificity** - Enrichment in specific tissues
|
||||
- **Protein level** - Correlation with RNA expression
|
||||
- **Subcellular location** - Where protein is found in cell
|
||||
|
||||
### 4. Molecular Interactions
|
||||
|
||||
Protein-protein interactions, complex memberships, and molecular partnerships.
|
||||
|
||||
#### Interaction Types:
|
||||
|
||||
**Physical Interactions**
|
||||
- Direct protein-protein binding
|
||||
- Complex components
|
||||
- Sources: IntAct, BioGRID, STRING
|
||||
|
||||
**Pathway Membership**
|
||||
- Biological pathways from Reactome
|
||||
- Functional relationships
|
||||
- Upstream/downstream regulators
|
||||
|
||||
**Target Interactors**
|
||||
- Direct interactors relevant to disease associations
|
||||
- Context-specific interactions
|
||||
|
||||
### 5. Gene Essentiality
|
||||
|
||||
Dependency data indicating if gene is essential for cell survival.
|
||||
|
||||
#### Data Sources:
|
||||
|
||||
**Project Score**
|
||||
- CRISPR-Cas9 fitness screens
|
||||
- 300+ cancer cell lines
|
||||
- Scaled essentiality scores (0-1)
|
||||
|
||||
**DepMap Portal**
|
||||
- Large-scale cancer dependency data
|
||||
- Genetic and pharmacological perturbations
|
||||
- Common essential genes identification
|
||||
|
||||
#### Essentiality Metrics:
|
||||
|
||||
- **Score range**: 0 (non-essential) to 1 (essential)
|
||||
- **Context**: Cell line specific vs. pan-essential
|
||||
- **Therapeutic window**: Selectivity between disease and normal cells
|
||||
|
||||
### 6. Chemical Probes and Tool Compounds
|
||||
|
||||
High-quality small molecules for target validation.
|
||||
|
||||
#### Sources:
|
||||
|
||||
**Probes & Drugs Portal**
|
||||
- Chemical probes with characterized selectivity
|
||||
- Quality ratings and annotations
|
||||
- Target engagement data
|
||||
|
||||
**Structural Genomics Consortium (SGC)**
|
||||
- Target Enabling Packages (TEPs)
|
||||
- Comprehensive target reagents
|
||||
- Freely available to academia
|
||||
|
||||
**Probe Criteria:**
|
||||
- Potency (typically IC50 < 100 nM)
|
||||
- Selectivity (>30-fold vs. off-targets)
|
||||
- Cell activity demonstrated
|
||||
- Negative control available
|
||||
|
||||
### 7. Pharmacogenetics
|
||||
|
||||
Genetic variants affecting drug response for drugs targeting the gene.
|
||||
|
||||
#### Data Source: ClinPGx
|
||||
|
||||
**Information Included:**
|
||||
- Variant-drug pairs
|
||||
- Clinical annotations (dosing, efficacy, toxicity)
|
||||
- Evidence level and sources
|
||||
- PharmGKB cross-references
|
||||
|
||||
**Clinical Utility:**
|
||||
- Dosing adjustments based on genotype
|
||||
- Contraindications for specific variants
|
||||
- Efficacy predictors
|
||||
|
||||
### 8. Genetic Constraint
|
||||
|
||||
Measures of negative selection against variants in the gene.
|
||||
|
||||
#### Data Source: gnomAD
|
||||
|
||||
**Metrics:**
|
||||
|
||||
**pLI (probability of Loss-of-function Intolerance)**
|
||||
- Range: 0-1
|
||||
- pLI > 0.9 indicates intolerant to LoF variants
|
||||
- High pLI suggests essentiality
|
||||
|
||||
**LOEUF (Loss-of-function Observed/Expected Upper bound Fraction)**
|
||||
- Lower values indicate greater constraint
|
||||
- More interpretable than pLI across range
|
||||
|
||||
**Missense Constraint**
|
||||
- Z-scores for missense depletion
|
||||
- O/E ratios for missense variants
|
||||
|
||||
**Interpretation:**
|
||||
- High constraint suggests important biological function
|
||||
- May indicate safety concerns if inhibited
|
||||
- Essential genes often show high constraint
|
||||
|
||||
### 9. Comparative Genomics
|
||||
|
||||
Cross-species gene conservation and ortholog information.
|
||||
|
||||
#### Data Source: Ensembl Compara
|
||||
|
||||
**Ortholog Data:**
|
||||
- Mouse, rat, zebrafish, other model organisms
|
||||
- Orthology confidence (1:1, 1:many, many:many)
|
||||
- Percent identity and similarity
|
||||
|
||||
**Utility:**
|
||||
- Model organism studies transferability
|
||||
- Functional conservation assessment
|
||||
- Evolution and selective pressure
|
||||
|
||||
### 10. Cancer Annotations
|
||||
|
||||
Cancer-specific target features for oncology indications.
|
||||
|
||||
#### Data Sources:
|
||||
|
||||
**Cancer Gene Census**
|
||||
- Role in cancer (oncogene, TSG, fusion)
|
||||
- Tier classification (1 = established, 2 = emerging)
|
||||
- Tumor types and mutation types
|
||||
|
||||
**Cancer Hallmarks**
|
||||
- Functional roles in cancer biology
|
||||
- Hallmarks: proliferation, apoptosis evasion, metastasis, etc.
|
||||
- Links to specific cancer processes
|
||||
|
||||
**Oncology Clinical Trials**
|
||||
- Drugs in development targeting gene for cancer
|
||||
- Trial phases and indications
|
||||
|
||||
### 11. Mouse Phenotypes
|
||||
|
||||
Phenotypes from mouse knockout/mutation studies.
|
||||
|
||||
#### Data Source: MGI (Mouse Genome Informatics)
|
||||
|
||||
**Phenotype Data:**
|
||||
- Knockout phenotypes
|
||||
- Disease model associations
|
||||
- Mammalian Phenotype Ontology (MP) terms
|
||||
|
||||
**Utility:**
|
||||
- Predict on-target effects
|
||||
- Safety liability identification
|
||||
- Mechanism of action insights
|
||||
|
||||
### 12. Pathways
|
||||
|
||||
Biological pathway annotations placing target in functional context.
|
||||
|
||||
#### Data Source: Reactome
|
||||
|
||||
**Pathway Information:**
|
||||
- Curated biological pathways
|
||||
- Hierarchical organization
|
||||
- Pathway diagrams with target position
|
||||
|
||||
**Applications:**
|
||||
- Mechanism hypothesis generation
|
||||
- Related target identification
|
||||
- Systems biology analysis
|
||||
|
||||
## Using Target Annotations in Queries
|
||||
|
||||
### Query Template: Comprehensive Target Profile
|
||||
|
||||
```python
|
||||
query = """
|
||||
query targetProfile($ensemblId: String!) {
|
||||
target(ensemblId: $ensemblId) {
|
||||
id
|
||||
approvedSymbol
|
||||
approvedName
|
||||
biotype
|
||||
|
||||
# Tractability
|
||||
tractability {
|
||||
label
|
||||
modality
|
||||
value
|
||||
}
|
||||
|
||||
# Safety
|
||||
safetyLiabilities {
|
||||
event
|
||||
effects {
|
||||
dosing
|
||||
organsAffected
|
||||
}
|
||||
}
|
||||
|
||||
# Expression
|
||||
expressions {
|
||||
tissue {
|
||||
label
|
||||
}
|
||||
rna {
|
||||
value
|
||||
level
|
||||
}
|
||||
protein {
|
||||
level
|
||||
}
|
||||
}
|
||||
|
||||
# Chemical probes
|
||||
chemicalProbes {
|
||||
id
|
||||
probeminer
|
||||
origin
|
||||
}
|
||||
|
||||
# Known drugs
|
||||
knownDrugs {
|
||||
uniqueDrugs
|
||||
rows {
|
||||
drug {
|
||||
name
|
||||
maximumClinicalTrialPhase
|
||||
}
|
||||
phase
|
||||
status
|
||||
}
|
||||
}
|
||||
|
||||
# Genetic constraint
|
||||
geneticConstraint {
|
||||
constraintType
|
||||
score
|
||||
exp
|
||||
obs
|
||||
}
|
||||
|
||||
# Pathways
|
||||
pathways {
|
||||
pathway
|
||||
pathwayId
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
variables = {"ensemblId": "ENSG00000157764"}
|
||||
```
|
||||
|
||||
## Annotation Interpretation Guidelines
|
||||
|
||||
### For Target Prioritization:
|
||||
|
||||
1. **Druggability (Tractability):**
|
||||
- Clinical precedence >> Discovery precedence > Predicted
|
||||
- Consider modality relevant to therapeutic approach
|
||||
- Check for existing tool compounds
|
||||
|
||||
2. **Safety Assessment:**
|
||||
- Review organ toxicity signals
|
||||
- Check expression in critical tissues
|
||||
- Assess genetic constraint (high = safety concern if inhibited)
|
||||
- Evaluate clinical adverse events from drugs
|
||||
|
||||
3. **Disease Relevance:**
|
||||
- Combine with association scores
|
||||
- Check expression in disease-relevant tissues
|
||||
- Review pathway context
|
||||
|
||||
4. **Validation Readiness:**
|
||||
- Chemical probes available?
|
||||
- Model organism data supportive?
|
||||
- Known drugs provide mechanism insight?
|
||||
|
||||
5. **Clinical Path Considerations:**
|
||||
- Pharmacogenetic factors
|
||||
- Expression pattern (tissue-specific is better for selectivity)
|
||||
- Essentiality (non-essential better for safety)
|
||||
|
||||
### Red Flags:
|
||||
|
||||
- **High essentiality + ubiquitous expression** - Poor therapeutic window
|
||||
- **Multiple safety liabilities** - Toxicity concerns
|
||||
- **High genetic constraint (pLI > 0.9)** - Critical gene, inhibition may be harmful
|
||||
- **No tractability precedence** - Higher risk, longer development
|
||||
- **Conflicting evidence** - Requires deeper investigation
|
||||
|
||||
### Green Flags:
|
||||
|
||||
- **Clinical precedence + related indication** - De-risked mechanism
|
||||
- **Tissue-specific expression** - Better selectivity
|
||||
- **Chemical probes available** - Faster validation
|
||||
- **Low essentiality + disease relevance** - Good therapeutic window
|
||||
- **Multiple evidence types converge** - Higher confidence
|
||||
@@ -0,0 +1,403 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Open Targets Platform GraphQL Query Helper
|
||||
|
||||
This script provides reusable functions for querying the Open Targets Platform
|
||||
GraphQL API. Use these functions to retrieve target, disease, drug, and
|
||||
association data.
|
||||
|
||||
Dependencies: requests (pip install requests)
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
|
||||
# API endpoint
|
||||
BASE_URL = "https://api.platform.opentargets.org/api/v4/graphql"
|
||||
|
||||
|
||||
def execute_query(query: str, variables: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a GraphQL query against the Open Targets Platform API.
|
||||
|
||||
Args:
|
||||
query: GraphQL query string
|
||||
variables: Optional dictionary of variables for the query
|
||||
|
||||
Returns:
|
||||
Dictionary containing the API response data
|
||||
|
||||
Raises:
|
||||
Exception if the API request fails or returns errors
|
||||
"""
|
||||
payload = {"query": query}
|
||||
if variables:
|
||||
payload["variables"] = variables
|
||||
|
||||
try:
|
||||
response = requests.post(BASE_URL, json=payload, timeout=30)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
if "errors" in data:
|
||||
raise Exception(f"GraphQL errors: {data['errors']}")
|
||||
|
||||
return data.get("data", {})
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"API request failed: {str(e)}")
|
||||
|
||||
|
||||
def search_entities(query_string: str, entity_types: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for targets, diseases, or drugs by name or identifier.
|
||||
|
||||
Args:
|
||||
query_string: Search term (e.g., "BRCA1", "alzheimer", "aspirin")
|
||||
entity_types: Optional list to filter by entity type ["target", "disease", "drug"]
|
||||
|
||||
Returns:
|
||||
List of search results with id, name, entity type, and description
|
||||
"""
|
||||
query = """
|
||||
query search($queryString: String!, $entityNames: [String!]) {
|
||||
search(queryString: $queryString, entityNames: $entityNames, page: {size: 10}) {
|
||||
hits {
|
||||
id
|
||||
entity
|
||||
name
|
||||
description
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
variables = {"queryString": query_string}
|
||||
if entity_types:
|
||||
variables["entityNames"] = entity_types
|
||||
|
||||
result = execute_query(query, variables)
|
||||
return result.get("search", {}).get("hits", [])
|
||||
|
||||
|
||||
def get_target_info(ensembl_id: str, include_diseases: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve comprehensive information about a target gene.
|
||||
|
||||
Args:
|
||||
ensembl_id: Ensembl gene ID (e.g., "ENSG00000157764")
|
||||
include_diseases: Whether to include top associated diseases
|
||||
|
||||
Returns:
|
||||
Dictionary with target information including tractability, safety, expression
|
||||
"""
|
||||
disease_fragment = """
|
||||
associatedDiseases(page: {size: 10}) {
|
||||
rows {
|
||||
disease {
|
||||
id
|
||||
name
|
||||
}
|
||||
score
|
||||
datatypeScores {
|
||||
componentId
|
||||
score
|
||||
}
|
||||
}
|
||||
}
|
||||
""" if include_diseases else ""
|
||||
|
||||
query = f"""
|
||||
query targetInfo($ensemblId: String!) {{
|
||||
target(ensemblId: $ensemblId) {{
|
||||
id
|
||||
approvedSymbol
|
||||
approvedName
|
||||
biotype
|
||||
functionDescriptions
|
||||
|
||||
tractability {{
|
||||
label
|
||||
modality
|
||||
value
|
||||
}}
|
||||
|
||||
safetyLiabilities {{
|
||||
event
|
||||
effects {{
|
||||
dosing
|
||||
organsAffected
|
||||
}}
|
||||
biosamples {{
|
||||
tissue {{
|
||||
label
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
|
||||
geneticConstraint {{
|
||||
constraintType
|
||||
score
|
||||
exp
|
||||
obs
|
||||
}}
|
||||
|
||||
{disease_fragment}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
||||
result = execute_query(query, {"ensemblId": ensembl_id})
|
||||
return result.get("target", {})
|
||||
|
||||
|
||||
def get_disease_info(efo_id: str, include_targets: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve information about a disease.
|
||||
|
||||
Args:
|
||||
efo_id: EFO disease identifier (e.g., "EFO_0000249")
|
||||
include_targets: Whether to include top associated targets
|
||||
|
||||
Returns:
|
||||
Dictionary with disease information
|
||||
"""
|
||||
target_fragment = """
|
||||
associatedTargets(page: {size: 10}) {
|
||||
rows {
|
||||
target {
|
||||
id
|
||||
approvedSymbol
|
||||
approvedName
|
||||
}
|
||||
score
|
||||
datatypeScores {
|
||||
componentId
|
||||
score
|
||||
}
|
||||
}
|
||||
}
|
||||
""" if include_targets else ""
|
||||
|
||||
query = f"""
|
||||
query diseaseInfo($efoId: String!) {{
|
||||
disease(efoId: $efoId) {{
|
||||
id
|
||||
name
|
||||
description
|
||||
therapeuticAreas {{
|
||||
id
|
||||
name
|
||||
}}
|
||||
synonyms {{
|
||||
terms
|
||||
}}
|
||||
{target_fragment}
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
||||
result = execute_query(query, {"efoId": efo_id})
|
||||
return result.get("disease", {})
|
||||
|
||||
|
||||
def get_target_disease_evidence(ensembl_id: str, efo_id: str,
|
||||
data_types: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve evidence linking a target to a disease.
|
||||
|
||||
Args:
|
||||
ensembl_id: Ensembl gene ID
|
||||
efo_id: EFO disease identifier
|
||||
data_types: Optional filter for evidence types (e.g., ["genetic_association", "known_drug"])
|
||||
|
||||
Returns:
|
||||
List of evidence records with scores and sources
|
||||
"""
|
||||
query = """
|
||||
query evidences($ensemblId: String!, $efoId: String!, $dataTypes: [String!]) {
|
||||
disease(efoId: $efoId) {
|
||||
evidences(ensemblIds: [$ensemblId], datatypes: $dataTypes, size: 100) {
|
||||
rows {
|
||||
datasourceId
|
||||
datatypeId
|
||||
score
|
||||
targetFromSourceId
|
||||
studyId
|
||||
literature
|
||||
cohortPhenotypes
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
variables = {"ensemblId": ensembl_id, "efoId": efo_id}
|
||||
if data_types:
|
||||
variables["dataTypes"] = data_types
|
||||
|
||||
result = execute_query(query, variables)
|
||||
return result.get("disease", {}).get("evidences", {}).get("rows", [])
|
||||
|
||||
|
||||
def get_known_drugs_for_disease(efo_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get drugs known to be used for a disease.
|
||||
|
||||
Args:
|
||||
efo_id: EFO disease identifier
|
||||
|
||||
Returns:
|
||||
Dictionary with drug information including phase, targets, and status
|
||||
"""
|
||||
query = """
|
||||
query knownDrugs($efoId: String!) {
|
||||
disease(efoId: $efoId) {
|
||||
knownDrugs {
|
||||
uniqueDrugs
|
||||
uniqueTargets
|
||||
rows {
|
||||
drug {
|
||||
id
|
||||
name
|
||||
drugType
|
||||
maximumClinicalTrialPhase
|
||||
}
|
||||
targets {
|
||||
id
|
||||
approvedSymbol
|
||||
}
|
||||
phase
|
||||
status
|
||||
mechanismOfAction
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
result = execute_query(query, {"efoId": efo_id})
|
||||
return result.get("disease", {}).get("knownDrugs", {})
|
||||
|
||||
|
||||
def get_drug_info(chembl_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Retrieve information about a drug.
|
||||
|
||||
Args:
|
||||
chembl_id: ChEMBL identifier (e.g., "CHEMBL25")
|
||||
|
||||
Returns:
|
||||
Dictionary with drug information
|
||||
"""
|
||||
query = """
|
||||
query drugInfo($chemblId: String!) {
|
||||
drug(chemblId: $chemblId) {
|
||||
id
|
||||
name
|
||||
synonyms
|
||||
drugType
|
||||
maximumClinicalTrialPhase
|
||||
hasBeenWithdrawn
|
||||
withdrawnNotice {
|
||||
reasons
|
||||
countries
|
||||
}
|
||||
mechanismsOfAction {
|
||||
actionType
|
||||
mechanismOfAction
|
||||
targetName
|
||||
targets {
|
||||
id
|
||||
approvedSymbol
|
||||
}
|
||||
}
|
||||
indications {
|
||||
disease
|
||||
efoId
|
||||
maxPhaseForIndication
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
result = execute_query(query, {"chemblId": chembl_id})
|
||||
return result.get("drug", {})
|
||||
|
||||
|
||||
def get_target_associations(ensembl_id: str, min_score: float = 0.0) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all disease associations for a target, filtered by minimum score.
|
||||
|
||||
Args:
|
||||
ensembl_id: Ensembl gene ID
|
||||
min_score: Minimum association score (0-1) to include
|
||||
|
||||
Returns:
|
||||
List of disease associations with scores
|
||||
"""
|
||||
query = """
|
||||
query targetAssociations($ensemblId: String!) {
|
||||
target(ensemblId: $ensemblId) {
|
||||
associatedDiseases(page: {size: 100}) {
|
||||
count
|
||||
rows {
|
||||
disease {
|
||||
id
|
||||
name
|
||||
}
|
||||
score
|
||||
datatypeScores {
|
||||
componentId
|
||||
score
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
result = execute_query(query, {"ensemblId": ensembl_id})
|
||||
associations = result.get("target", {}).get("associatedDiseases", {}).get("rows", [])
|
||||
|
||||
# Filter by minimum score
|
||||
return [assoc for assoc in associations if assoc.get("score", 0) >= min_score]
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Example 1: Search for a gene
|
||||
print("Searching for BRCA1...")
|
||||
results = search_entities("BRCA1", entity_types=["target"])
|
||||
for result in results[:3]:
|
||||
print(f" {result['name']} ({result['id']})")
|
||||
|
||||
# Example 2: Get target information
|
||||
if results:
|
||||
ensembl_id = results[0]['id']
|
||||
print(f"\nGetting info for {ensembl_id}...")
|
||||
target_info = get_target_info(ensembl_id, include_diseases=True)
|
||||
print(f" Symbol: {target_info.get('approvedSymbol')}")
|
||||
print(f" Name: {target_info.get('approvedName')}")
|
||||
|
||||
# Show top diseases
|
||||
diseases = target_info.get('associatedDiseases', {}).get('rows', [])
|
||||
if diseases:
|
||||
print(f"\n Top associated diseases:")
|
||||
for disease in diseases[:3]:
|
||||
print(f" - {disease['disease']['name']} (score: {disease['score']:.2f})")
|
||||
|
||||
# Example 3: Search for a disease
|
||||
print("\n\nSearching for Alzheimer's disease...")
|
||||
disease_results = search_entities("alzheimer", entity_types=["disease"])
|
||||
if disease_results:
|
||||
efo_id = disease_results[0]['id']
|
||||
print(f" Found: {disease_results[0]['name']} ({efo_id})")
|
||||
|
||||
# Get known drugs
|
||||
print(f"\n Known drugs for {disease_results[0]['name']}:")
|
||||
drugs = get_known_drugs_for_disease(efo_id)
|
||||
for drug in drugs.get('rows', [])[:5]:
|
||||
print(f" - {drug['drug']['name']} (Phase {drug['phase']})")
|
||||
597
scientific-databases/uspto-database/SKILL.md
Normal file
597
scientific-databases/uspto-database/SKILL.md
Normal file
@@ -0,0 +1,597 @@
|
||||
---
|
||||
name: uspto-database
|
||||
description: Access and analyze United States Patent and Trademark Office (USPTO) data including patent searches, trademark lookups, patent examination history, office actions, assignments, citations, and litigation records. Use this skill when working with US patents or trademarks, analyzing patent prosecution, tracking IP ownership, conducting prior art searches, monitoring trademark status, retrieving patent examination data from PEDS, searching patent citations, or analyzing patent portfolios. Supports PatentSearch API (ElasticSearch-based patent search), PEDS (Patent Examination Data System), TSDR (Trademark Status & Document Retrieval), Patent/Trademark Assignment APIs, Office Action APIs, PTAB proceedings, and patent litigation data.
|
||||
---
|
||||
|
||||
# USPTO Database
|
||||
|
||||
## Overview
|
||||
|
||||
Access comprehensive United States Patent and Trademark Office data through multiple specialized APIs. This skill enables patent and trademark searching, retrieval of examination history, analysis of citations and office actions, tracking of assignments and ownership, and access to litigation records.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Use this skill for tasks involving:
|
||||
|
||||
- **Patent Search**: Finding patents by keywords, inventors, assignees, classifications, or dates
|
||||
- **Patent Details**: Retrieving full patent data including claims, abstracts, citations
|
||||
- **Trademark Search**: Looking up trademarks by serial or registration number
|
||||
- **Trademark Status**: Checking trademark status, ownership, and prosecution history
|
||||
- **Examination History**: Accessing patent prosecution data from PEDS (Patent Examination Data System)
|
||||
- **Office Actions**: Retrieving office action text, citations, and rejections
|
||||
- **Assignments**: Tracking patent/trademark ownership transfers
|
||||
- **Citations**: Analyzing patent citations (forward and backward)
|
||||
- **Litigation**: Accessing patent litigation records
|
||||
- **Portfolio Analysis**: Analyzing patent/trademark portfolios for companies or inventors
|
||||
|
||||
## USPTO API Ecosystem
|
||||
|
||||
The USPTO provides multiple specialized APIs for different data needs:
|
||||
|
||||
### Core APIs
|
||||
|
||||
1. **PatentSearch API** - Modern ElasticSearch-based patent search (replaced legacy PatentsView in May 2025)
|
||||
- Search patents by keywords, inventors, assignees, classifications, dates
|
||||
- Access to patent data through June 30, 2025
|
||||
- 45 requests/minute rate limit
|
||||
- **Base URL**: `https://search.patentsview.org/api/v1/`
|
||||
|
||||
2. **PEDS (Patent Examination Data System)** - Patent examination history
|
||||
- Application status and transaction history from 1981-present
|
||||
- Office action dates and examination events
|
||||
- Use `uspto-opendata-python` Python library
|
||||
- **Replaced**: PAIR Bulk Data (PBD) - decommissioned
|
||||
|
||||
3. **TSDR (Trademark Status & Document Retrieval)** - Trademark data
|
||||
- Trademark status, ownership, prosecution history
|
||||
- Search by serial or registration number
|
||||
- **Base URL**: `https://tsdrapi.uspto.gov/ts/cd/`
|
||||
|
||||
### Additional APIs
|
||||
|
||||
4. **Patent Assignment Search** - Ownership records and transfers
|
||||
5. **Trademark Assignment Search** - Trademark ownership changes
|
||||
6. **Enriched Citation API** - Patent citation analysis
|
||||
7. **Office Action Text Retrieval** - Full text of office actions
|
||||
8. **Office Action Citations** - Citations from office actions
|
||||
9. **Office Action Rejection** - Rejection reasons and types
|
||||
10. **PTAB API** - Patent Trial and Appeal Board proceedings
|
||||
11. **Patent Litigation Cases** - Federal district court litigation data
|
||||
12. **Cancer Moonshot Data Set** - Cancer-related patents
|
||||
|
||||
## Quick Start
|
||||
|
||||
### API Key Registration
|
||||
|
||||
All USPTO APIs require an API key. Register at:
|
||||
**https://account.uspto.gov/api-manager/**
|
||||
|
||||
Set the API key as an environment variable:
|
||||
```bash
|
||||
export USPTO_API_KEY="your_api_key_here"
|
||||
```
|
||||
|
||||
### Helper Scripts
|
||||
|
||||
This skill includes Python scripts for common operations:
|
||||
|
||||
- **`scripts/patent_search.py`** - PatentSearch API client for searching patents
|
||||
- **`scripts/peds_client.py`** - PEDS client for examination history
|
||||
- **`scripts/trademark_client.py`** - TSDR client for trademark data
|
||||
|
||||
## Task 1: Searching Patents
|
||||
|
||||
### Using the PatentSearch API
|
||||
|
||||
The PatentSearch API uses a JSON query language with various operators for flexible searching.
|
||||
|
||||
#### Basic Patent Search Examples
|
||||
|
||||
**Search by keywords in abstract:**
|
||||
```python
|
||||
from scripts.patent_search import PatentSearchClient
|
||||
|
||||
client = PatentSearchClient()
|
||||
|
||||
# Search for machine learning patents
|
||||
results = client.search_patents({
|
||||
"patent_abstract": {"_text_all": ["machine", "learning"]}
|
||||
})
|
||||
|
||||
for patent in results['patents']:
|
||||
print(f"{patent['patent_number']}: {patent['patent_title']}")
|
||||
```
|
||||
|
||||
**Search by inventor:**
|
||||
```python
|
||||
results = client.search_by_inventor("John Smith")
|
||||
```
|
||||
|
||||
**Search by assignee/company:**
|
||||
```python
|
||||
results = client.search_by_assignee("Google")
|
||||
```
|
||||
|
||||
**Search by date range:**
|
||||
```python
|
||||
results = client.search_by_date_range("2024-01-01", "2024-12-31")
|
||||
```
|
||||
|
||||
**Search by CPC classification:**
|
||||
```python
|
||||
results = client.search_by_classification("H04N") # Video/image tech
|
||||
```
|
||||
|
||||
#### Advanced Patent Search
|
||||
|
||||
Combine multiple criteria with logical operators:
|
||||
|
||||
```python
|
||||
results = client.advanced_search(
|
||||
keywords=["artificial", "intelligence"],
|
||||
assignee="Microsoft",
|
||||
start_date="2023-01-01",
|
||||
end_date="2024-12-31",
|
||||
cpc_codes=["G06N", "G06F"] # AI and computing classifications
|
||||
)
|
||||
```
|
||||
|
||||
#### Direct API Usage
|
||||
|
||||
For complex queries, use the API directly:
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
url = "https://search.patentsview.org/api/v1/patent"
|
||||
headers = {
|
||||
"X-Api-Key": "YOUR_API_KEY",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
query = {
|
||||
"q": {
|
||||
"_and": [
|
||||
{"patent_date": {"_gte": "2024-01-01"}},
|
||||
{"assignee_organization": {"_text_any": ["Google", "Alphabet"]}},
|
||||
{"cpc_subclass_id": ["G06N", "H04N"]}
|
||||
]
|
||||
},
|
||||
"f": ["patent_number", "patent_title", "patent_date", "inventor_name"],
|
||||
"s": [{"patent_date": "desc"}],
|
||||
"o": {"per_page": 100, "page": 1}
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=query)
|
||||
results = response.json()
|
||||
```
|
||||
|
||||
### Query Operators
|
||||
|
||||
- **Equality**: `{"field": "value"}` or `{"field": {"_eq": "value"}}`
|
||||
- **Comparison**: `_gt`, `_gte`, `_lt`, `_lte`, `_neq`
|
||||
- **Text search**: `_text_all`, `_text_any`, `_text_phrase`
|
||||
- **String matching**: `_begins`, `_contains`
|
||||
- **Logical**: `_and`, `_or`, `_not`
|
||||
|
||||
**Best Practice**: Use `_text_*` operators for text fields (more performant than `_contains` or `_begins`)
|
||||
|
||||
### Available Patent Endpoints
|
||||
|
||||
- `/patent` - Granted patents
|
||||
- `/publication` - Pregrant publications
|
||||
- `/inventor` - Inventor information
|
||||
- `/assignee` - Assignee information
|
||||
- `/cpc_subclass`, `/cpc_at_issue` - CPC classifications
|
||||
- `/uspc` - US Patent Classification
|
||||
- `/ipc` - International Patent Classification
|
||||
- `/claims`, `/brief_summary_text`, `/detail_description_text` - Text data (beta)
|
||||
|
||||
### Reference Documentation
|
||||
|
||||
See `references/patentsearch_api.md` for complete PatentSearch API documentation including:
|
||||
- All available endpoints
|
||||
- Complete field reference
|
||||
- Query syntax and examples
|
||||
- Response formats
|
||||
- Rate limits and best practices
|
||||
|
||||
## Task 2: Retrieving Patent Examination Data
|
||||
|
||||
### Using PEDS (Patent Examination Data System)
|
||||
|
||||
PEDS provides comprehensive prosecution history including transaction events, status changes, and examination timeline.
|
||||
|
||||
#### Installation
|
||||
|
||||
```bash
|
||||
pip install uspto-opendata-python
|
||||
```
|
||||
|
||||
#### Basic PEDS Usage
|
||||
|
||||
**Get application data:**
|
||||
```python
|
||||
from scripts.peds_client import PEDSHelper
|
||||
|
||||
helper = PEDSHelper()
|
||||
|
||||
# By application number
|
||||
app_data = helper.get_application("16123456")
|
||||
print(f"Title: {app_data['title']}")
|
||||
print(f"Status: {app_data['app_status']}")
|
||||
|
||||
# By patent number
|
||||
patent_data = helper.get_patent("11234567")
|
||||
```
|
||||
|
||||
**Get transaction history:**
|
||||
```python
|
||||
transactions = helper.get_transaction_history("16123456")
|
||||
|
||||
for trans in transactions:
|
||||
print(f"{trans['date']}: {trans['code']} - {trans['description']}")
|
||||
```
|
||||
|
||||
**Get office actions:**
|
||||
```python
|
||||
office_actions = helper.get_office_actions("16123456")
|
||||
|
||||
for oa in office_actions:
|
||||
if oa['code'] == 'CTNF':
|
||||
print(f"Non-final rejection: {oa['date']}")
|
||||
elif oa['code'] == 'CTFR':
|
||||
print(f"Final rejection: {oa['date']}")
|
||||
elif oa['code'] == 'NOA':
|
||||
print(f"Notice of allowance: {oa['date']}")
|
||||
```
|
||||
|
||||
**Get status summary:**
|
||||
```python
|
||||
summary = helper.get_status_summary("16123456")
|
||||
|
||||
print(f"Current status: {summary['current_status']}")
|
||||
print(f"Filing date: {summary['filing_date']}")
|
||||
print(f"Pendency: {summary['pendency_days']} days")
|
||||
|
||||
if summary['is_patented']:
|
||||
print(f"Patent number: {summary['patent_number']}")
|
||||
print(f"Issue date: {summary['issue_date']}")
|
||||
```
|
||||
|
||||
#### Prosecution Analysis
|
||||
|
||||
Analyze prosecution patterns:
|
||||
|
||||
```python
|
||||
analysis = helper.analyze_prosecution("16123456")
|
||||
|
||||
print(f"Total office actions: {analysis['total_office_actions']}")
|
||||
print(f"Non-final rejections: {analysis['non_final_rejections']}")
|
||||
print(f"Final rejections: {analysis['final_rejections']}")
|
||||
print(f"Allowed: {analysis['allowance']}")
|
||||
print(f"Responses filed: {analysis['responses']}")
|
||||
```
|
||||
|
||||
### Common Transaction Codes
|
||||
|
||||
- **CTNF** - Non-final rejection mailed
|
||||
- **CTFR** - Final rejection mailed
|
||||
- **NOA** - Notice of allowance mailed
|
||||
- **WRIT** - Response filed
|
||||
- **ISS.FEE** - Issue fee payment
|
||||
- **ABND** - Application abandoned
|
||||
- **AOPF** - Office action mailed
|
||||
|
||||
### Reference Documentation
|
||||
|
||||
See `references/peds_api.md` for complete PEDS documentation including:
|
||||
- All available data fields
|
||||
- Transaction code reference
|
||||
- Python library usage
|
||||
- Portfolio analysis examples
|
||||
|
||||
## Task 3: Searching and Monitoring Trademarks
|
||||
|
||||
### Using TSDR (Trademark Status & Document Retrieval)
|
||||
|
||||
Access trademark status, ownership, and prosecution history.
|
||||
|
||||
#### Basic Trademark Usage
|
||||
|
||||
**Get trademark by serial number:**
|
||||
```python
|
||||
from scripts.trademark_client import TrademarkClient
|
||||
|
||||
client = TrademarkClient()
|
||||
|
||||
# By serial number
|
||||
tm_data = client.get_trademark_by_serial("87654321")
|
||||
|
||||
# By registration number
|
||||
tm_data = client.get_trademark_by_registration("5678901")
|
||||
```
|
||||
|
||||
**Get trademark status:**
|
||||
```python
|
||||
status = client.get_trademark_status("87654321")
|
||||
|
||||
print(f"Mark: {status['mark_text']}")
|
||||
print(f"Status: {status['status']}")
|
||||
print(f"Filing date: {status['filing_date']}")
|
||||
|
||||
if status['is_registered']:
|
||||
print(f"Registration #: {status['registration_number']}")
|
||||
print(f"Registration date: {status['registration_date']}")
|
||||
```
|
||||
|
||||
**Check trademark health:**
|
||||
```python
|
||||
health = client.check_trademark_health("87654321")
|
||||
|
||||
print(f"Mark: {health['mark']}")
|
||||
print(f"Status: {health['status']}")
|
||||
|
||||
for alert in health['alerts']:
|
||||
print(alert)
|
||||
|
||||
if health['needs_attention']:
|
||||
print("⚠️ This mark needs attention!")
|
||||
```
|
||||
|
||||
#### Trademark Portfolio Monitoring
|
||||
|
||||
Monitor multiple trademarks:
|
||||
|
||||
```python
|
||||
def monitor_portfolio(serial_numbers, api_key):
|
||||
"""Monitor trademark portfolio health."""
|
||||
client = TrademarkClient(api_key)
|
||||
|
||||
results = {
|
||||
'active': [],
|
||||
'pending': [],
|
||||
'problems': []
|
||||
}
|
||||
|
||||
for sn in serial_numbers:
|
||||
health = client.check_trademark_health(sn)
|
||||
|
||||
if 'REGISTERED' in health['status']:
|
||||
results['active'].append(health)
|
||||
elif 'PENDING' in health['status'] or 'PUBLISHED' in health['status']:
|
||||
results['pending'].append(health)
|
||||
elif health['needs_attention']:
|
||||
results['problems'].append(health)
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
### Common Trademark Statuses
|
||||
|
||||
- **REGISTERED** - Active registered mark
|
||||
- **PENDING** - Under examination
|
||||
- **PUBLISHED FOR OPPOSITION** - In opposition period
|
||||
- **ABANDONED** - Application abandoned
|
||||
- **CANCELLED** - Registration cancelled
|
||||
- **SUSPENDED** - Examination suspended
|
||||
- **REGISTERED AND RENEWED** - Registration renewed
|
||||
|
||||
### Reference Documentation
|
||||
|
||||
See `references/trademark_api.md` for complete trademark API documentation including:
|
||||
- TSDR API reference
|
||||
- Trademark Assignment Search API
|
||||
- All status codes
|
||||
- Prosecution history access
|
||||
- Ownership tracking
|
||||
|
||||
## Task 4: Tracking Assignments and Ownership
|
||||
|
||||
### Patent and Trademark Assignments
|
||||
|
||||
Both patents and trademarks have Assignment Search APIs for tracking ownership changes.
|
||||
|
||||
#### Patent Assignment API
|
||||
|
||||
**Base URL**: `https://assignment-api.uspto.gov/patent/v1.4/`
|
||||
|
||||
**Search by patent number:**
|
||||
```python
|
||||
import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
def get_patent_assignments(patent_number, api_key):
|
||||
url = f"https://assignment-api.uspto.gov/patent/v1.4/assignment/patent/{patent_number}"
|
||||
headers = {"X-Api-Key": api_key}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
return response.text # Returns XML
|
||||
|
||||
assignments_xml = get_patent_assignments("11234567", api_key)
|
||||
root = ET.fromstring(assignments_xml)
|
||||
|
||||
for assignment in root.findall('.//assignment'):
|
||||
recorded_date = assignment.find('recordedDate').text
|
||||
assignor = assignment.find('.//assignor/name').text
|
||||
assignee = assignment.find('.//assignee/name').text
|
||||
conveyance = assignment.find('conveyanceText').text
|
||||
|
||||
print(f"{recorded_date}: {assignor} → {assignee}")
|
||||
print(f" Type: {conveyance}\n")
|
||||
```
|
||||
|
||||
**Search by company name:**
|
||||
```python
|
||||
def find_company_patents(company_name, api_key):
|
||||
url = "https://assignment-api.uspto.gov/patent/v1.4/assignment/search"
|
||||
headers = {"X-Api-Key": api_key}
|
||||
data = {"criteria": {"assigneeName": company_name}}
|
||||
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
return response.text
|
||||
```
|
||||
|
||||
### Common Assignment Types
|
||||
|
||||
- **ASSIGNMENT OF ASSIGNORS INTEREST** - Ownership transfer
|
||||
- **SECURITY AGREEMENT** - Collateral/security interest
|
||||
- **MERGER** - Corporate merger
|
||||
- **CHANGE OF NAME** - Name change
|
||||
- **ASSIGNMENT OF PARTIAL INTEREST** - Partial ownership
|
||||
|
||||
## Task 5: Accessing Additional USPTO Data
|
||||
|
||||
### Office Actions, Citations, and Litigation
|
||||
|
||||
Multiple specialized APIs provide additional patent data.
|
||||
|
||||
#### Office Action Text Retrieval
|
||||
|
||||
Retrieve full text of office actions using application number. Integrate with PEDS to identify which office actions exist, then retrieve full text.
|
||||
|
||||
#### Enriched Citation API
|
||||
|
||||
Analyze patent citations:
|
||||
- Forward citations (patents citing this patent)
|
||||
- Backward citations (prior art cited)
|
||||
- Examiner vs. applicant citations
|
||||
- Citation context
|
||||
|
||||
#### Patent Litigation Cases API
|
||||
|
||||
Access federal district court patent litigation records:
|
||||
- 74,623+ litigation records
|
||||
- Patents asserted
|
||||
- Parties and venues
|
||||
- Case outcomes
|
||||
|
||||
#### PTAB API
|
||||
|
||||
Patent Trial and Appeal Board proceedings:
|
||||
- Inter partes review (IPR)
|
||||
- Post-grant review (PGR)
|
||||
- Appeal decisions
|
||||
|
||||
### Reference Documentation
|
||||
|
||||
See `references/additional_apis.md` for comprehensive documentation on:
|
||||
- Enriched Citation API
|
||||
- Office Action APIs (Text, Citations, Rejections)
|
||||
- Patent Litigation Cases API
|
||||
- PTAB API
|
||||
- Cancer Moonshot Data Set
|
||||
- OCE Status/Event Codes
|
||||
|
||||
## Complete Analysis Example
|
||||
|
||||
### Comprehensive Patent Analysis
|
||||
|
||||
Combine multiple APIs for complete patent intelligence:
|
||||
|
||||
```python
|
||||
def comprehensive_patent_analysis(patent_number, api_key):
|
||||
"""
|
||||
Full patent analysis using multiple USPTO APIs.
|
||||
"""
|
||||
from scripts.patent_search import PatentSearchClient
|
||||
from scripts.peds_client import PEDSHelper
|
||||
|
||||
results = {}
|
||||
|
||||
# 1. Get patent details
|
||||
patent_client = PatentSearchClient(api_key)
|
||||
patent_data = patent_client.get_patent(patent_number)
|
||||
results['patent'] = patent_data
|
||||
|
||||
# 2. Get examination history
|
||||
peds = PEDSHelper()
|
||||
results['prosecution'] = peds.analyze_prosecution(patent_number)
|
||||
results['status'] = peds.get_status_summary(patent_number)
|
||||
|
||||
# 3. Get assignment history
|
||||
import requests
|
||||
assign_url = f"https://assignment-api.uspto.gov/patent/v1.4/assignment/patent/{patent_number}"
|
||||
assign_resp = requests.get(assign_url, headers={"X-Api-Key": api_key})
|
||||
results['assignments'] = assign_resp.text if assign_resp.status_code == 200 else None
|
||||
|
||||
# 4. Analyze results
|
||||
print(f"\n=== Patent {patent_number} Analysis ===\n")
|
||||
print(f"Title: {patent_data['patent_title']}")
|
||||
print(f"Assignee: {', '.join(patent_data.get('assignee_organization', []))}")
|
||||
print(f"Issue Date: {patent_data['patent_date']}")
|
||||
|
||||
print(f"\nProsecution:")
|
||||
print(f" Office Actions: {results['prosecution']['total_office_actions']}")
|
||||
print(f" Rejections: {results['prosecution']['non_final_rejections']} non-final, {results['prosecution']['final_rejections']} final")
|
||||
print(f" Pendency: {results['prosecution']['pendency_days']} days")
|
||||
|
||||
# Analyze citations
|
||||
if 'cited_patent_number' in patent_data:
|
||||
print(f"\nCitations:")
|
||||
print(f" Cites: {len(patent_data['cited_patent_number'])} patents")
|
||||
if 'citedby_patent_number' in patent_data:
|
||||
print(f" Cited by: {len(patent_data['citedby_patent_number'])} patents")
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **API Key Management**
|
||||
- Store API key in environment variables
|
||||
- Never commit keys to version control
|
||||
- Use same key across all USPTO APIs
|
||||
|
||||
2. **Rate Limiting**
|
||||
- PatentSearch: 45 requests/minute
|
||||
- Implement exponential backoff for rate limit errors
|
||||
- Cache responses when possible
|
||||
|
||||
3. **Query Optimization**
|
||||
- Use `_text_*` operators for text fields (more performant)
|
||||
- Request only needed fields to reduce response size
|
||||
- Use date ranges to narrow searches
|
||||
|
||||
4. **Data Handling**
|
||||
- Not all fields populated for all patents/trademarks
|
||||
- Handle missing data gracefully
|
||||
- Parse dates consistently
|
||||
|
||||
5. **Combining APIs**
|
||||
- Use PatentSearch for discovery
|
||||
- Use PEDS for prosecution details
|
||||
- Use Assignment APIs for ownership tracking
|
||||
- Combine data for comprehensive analysis
|
||||
|
||||
## Important Notes
|
||||
|
||||
- **Legacy API Sunset**: PatentsView legacy API discontinued May 1, 2025 - use PatentSearch API
|
||||
- **PAIR Bulk Data Decommissioned**: Use PEDS instead
|
||||
- **Data Coverage**: PatentSearch has data through June 30, 2025; PEDS from 1981-present
|
||||
- **Text Endpoints**: Claims and description endpoints are in beta with ongoing backfilling
|
||||
- **Rate Limits**: Respect rate limits to avoid service disruptions
|
||||
|
||||
## Resources
|
||||
|
||||
### API Documentation
|
||||
- **PatentSearch API**: https://search.patentsview.org/docs/
|
||||
- **USPTO Developer Portal**: https://developer.uspto.gov/
|
||||
- **USPTO Open Data Portal**: https://data.uspto.gov/
|
||||
- **API Key Registration**: https://account.uspto.gov/api-manager/
|
||||
|
||||
### Python Libraries
|
||||
- **uspto-opendata-python**: https://pypi.org/project/uspto-opendata-python/
|
||||
- **USPTO Docs**: https://docs.ip-tools.org/uspto-opendata-python/
|
||||
|
||||
### Reference Files
|
||||
- `references/patentsearch_api.md` - Complete PatentSearch API reference
|
||||
- `references/peds_api.md` - PEDS API and library documentation
|
||||
- `references/trademark_api.md` - Trademark APIs (TSDR and Assignment)
|
||||
- `references/additional_apis.md` - Citations, Office Actions, Litigation, PTAB
|
||||
|
||||
### Scripts
|
||||
- `scripts/patent_search.py` - PatentSearch API client
|
||||
- `scripts/peds_client.py` - PEDS examination data client
|
||||
- `scripts/trademark_client.py` - Trademark search client
|
||||
@@ -0,0 +1,394 @@
|
||||
# Additional USPTO APIs Reference
|
||||
|
||||
## Overview
|
||||
|
||||
Beyond patent search, PEDS, and trademarks, USPTO provides specialized APIs for citations, office actions, assignments, litigation, and other patent data.
|
||||
|
||||
## 1. Enriched Citation API
|
||||
|
||||
### Overview
|
||||
|
||||
Provides insights into patent evaluation processes and cited references for the IP5 (USPTO, EPO, JPO, KIPO, CNIPA) and public use.
|
||||
|
||||
**Versions:** v3, v2, v1
|
||||
|
||||
**Base URL:** Access through USPTO Open Data Portal
|
||||
|
||||
### Purpose
|
||||
|
||||
Analyze which references examiners cite during patent examination and how patents cite prior art.
|
||||
|
||||
### Key Features
|
||||
|
||||
- **Forward citations** - Patents that cite a given patent
|
||||
- **Backward citations** - References cited by a patent
|
||||
- **Examiner citations** - References cited by examiner vs. applicant
|
||||
- **Citation context** - How and why references are cited
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Prior art analysis
|
||||
- Patent landscape analysis
|
||||
- Identifying related technologies
|
||||
- Assessing patent strength based on citations
|
||||
|
||||
## 2. Office Action APIs
|
||||
|
||||
### 2.1 Office Action Text Retrieval API
|
||||
|
||||
**Version:** v1
|
||||
|
||||
### Purpose
|
||||
|
||||
Retrieves complete full-text office action correspondence documents for patent applications.
|
||||
|
||||
### Features
|
||||
|
||||
- Full text of office actions
|
||||
- Restrictions, rejections, objections
|
||||
- Examiner amendments
|
||||
- Search information
|
||||
|
||||
### Example Use
|
||||
|
||||
```python
|
||||
# Retrieve office action text by application number
|
||||
def get_office_action_text(app_number, api_key):
|
||||
"""
|
||||
Fetch full text of office actions for an application.
|
||||
Note: Integrate with PEDS to identify which office actions exist.
|
||||
"""
|
||||
# API implementation
|
||||
pass
|
||||
```
|
||||
|
||||
### 2.2 Office Action Citations API
|
||||
|
||||
**Versions:** v2, beta v1
|
||||
|
||||
### Purpose
|
||||
|
||||
Provides patent citation data extracted from office actions, showing which references examiners used during examination.
|
||||
|
||||
### Key Data
|
||||
|
||||
- Patent and non-patent literature citations
|
||||
- Citation context (rejection, information, etc.)
|
||||
- Examiner search strategies
|
||||
- Prosecution research dataset
|
||||
|
||||
### 2.3 Office Action Rejection API
|
||||
|
||||
**Versions:** v2, beta v1
|
||||
|
||||
### Purpose
|
||||
|
||||
Details rejection reasons and examination outcomes with bulk rejection data through March 2025.
|
||||
|
||||
### Rejection Types
|
||||
|
||||
- **35 U.S.C. § 102** - Anticipation (lack of novelty)
|
||||
- **35 U.S.C. § 103** - Obviousness
|
||||
- **35 U.S.C. § 112** - Enablement, written description, indefiniteness
|
||||
- **35 U.S.C. § 101** - Subject matter eligibility
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Analyze common rejection reasons
|
||||
- Identify problematic claim language
|
||||
- Prepare responses based on historical data
|
||||
- Portfolio analysis of rejection patterns
|
||||
|
||||
### 2.4 Office Action Weekly Zips API
|
||||
|
||||
**Version:** v1
|
||||
|
||||
### Purpose
|
||||
|
||||
Delivers bulk downloads of full-text office action documents organized by weekly release schedules.
|
||||
|
||||
### Features
|
||||
|
||||
- Weekly archive downloads
|
||||
- Complete office action text
|
||||
- Bulk access for large-scale analysis
|
||||
|
||||
## 3. Patent Assignment Search API
|
||||
|
||||
### Overview
|
||||
|
||||
**Version:** v1.4
|
||||
|
||||
Accesses USPTO patent assignment database for ownership records and transfers.
|
||||
|
||||
**Base URL:** `https://assignment-api.uspto.gov/patent/`
|
||||
|
||||
### Purpose
|
||||
|
||||
Track patent ownership, assignments, security interests, and corporate transactions.
|
||||
|
||||
### Search Methods
|
||||
|
||||
#### By Patent Number
|
||||
|
||||
```
|
||||
GET /v1.4/assignment/patent/{patent_number}
|
||||
```
|
||||
|
||||
#### By Application Number
|
||||
|
||||
```
|
||||
GET /v1.4/assignment/application/{application_number}
|
||||
```
|
||||
|
||||
#### By Assignee Name
|
||||
|
||||
```
|
||||
POST /v1.4/assignment/search
|
||||
{
|
||||
"criteria": {
|
||||
"assigneeName": "Company Name"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Response Format
|
||||
|
||||
Returns XML with assignment records similar to trademark assignments:
|
||||
|
||||
- Reel/frame numbers
|
||||
- Conveyance type
|
||||
- Dates (execution and recorded)
|
||||
- Assignors and assignees
|
||||
- Affected patents/applications
|
||||
|
||||
### Common Uses
|
||||
|
||||
```python
|
||||
def track_patent_ownership(patent_number, api_key):
|
||||
"""Track ownership history of a patent."""
|
||||
url = f"https://assignment-api.uspto.gov/patent/v1.4/assignment/patent/{patent_number}"
|
||||
headers = {"X-Api-Key": api_key}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
# Parse XML to extract assignment history
|
||||
return response.text
|
||||
return None
|
||||
|
||||
def find_company_patents(company_name, api_key):
|
||||
"""Find patents assigned to a company."""
|
||||
url = "https://assignment-api.uspto.gov/patent/v1.4/assignment/search"
|
||||
headers = {"X-Api-Key": api_key}
|
||||
data = {"criteria": {"assigneeName": company_name}}
|
||||
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
return response.text
|
||||
```
|
||||
|
||||
## 4. PTAB API (Patent Trial and Appeal Board)
|
||||
|
||||
### Overview
|
||||
|
||||
**Version:** v2
|
||||
|
||||
Access to Patent Trial and Appeal Board proceedings data.
|
||||
|
||||
### Purpose
|
||||
|
||||
Retrieve information about:
|
||||
- Inter partes review (IPR)
|
||||
- Post-grant review (PGR)
|
||||
- Covered business method (CBM) review
|
||||
- Ex parte appeals
|
||||
|
||||
### Data Available
|
||||
|
||||
- Petition information
|
||||
- Trial decisions
|
||||
- Final written decisions
|
||||
- Petitioner and patent owner information
|
||||
- Claims challenged
|
||||
- Trial outcomes
|
||||
|
||||
### Note
|
||||
|
||||
Currently migrating to new Open Data Portal. Check current documentation for access details.
|
||||
|
||||
## 5. Patent Litigation Cases API
|
||||
|
||||
### Overview
|
||||
|
||||
**Version:** v1
|
||||
|
||||
Contains 74,623+ district court litigation records covering patent litigation data.
|
||||
|
||||
### Purpose
|
||||
|
||||
Access federal district court patent infringement cases.
|
||||
|
||||
### Key Data
|
||||
|
||||
- Case numbers and filing dates
|
||||
- Patents asserted
|
||||
- Parties (plaintiffs and defendants)
|
||||
- Venues
|
||||
- Case outcomes
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Litigation risk analysis
|
||||
- Identify frequently litigated patents
|
||||
- Track litigation trends
|
||||
- Analyze venue preferences
|
||||
- Assess patent enforcement patterns
|
||||
|
||||
## 6. Cancer Moonshot Patent Data Set API
|
||||
|
||||
### Overview
|
||||
|
||||
**Version:** v1.0.1
|
||||
|
||||
Specialized dataset for cancer-related patent discoveries.
|
||||
|
||||
### Purpose
|
||||
|
||||
Search and download patents related to cancer research, treatment, and diagnostics.
|
||||
|
||||
### Features
|
||||
|
||||
- Curated cancer-related patents
|
||||
- Bulk data download
|
||||
- Classification by cancer type
|
||||
- Treatment modality categorization
|
||||
|
||||
### Use Cases
|
||||
|
||||
- Cancer research prior art
|
||||
- Technology landscape analysis
|
||||
- Identify research trends
|
||||
- Licensing opportunities
|
||||
|
||||
## 7. OCE Patent Examination Status/Event Codes APIs
|
||||
|
||||
### Overview
|
||||
|
||||
**Version:** v1
|
||||
|
||||
Provides official descriptions of USPTO status and event codes used in patent examination.
|
||||
|
||||
### Purpose
|
||||
|
||||
Decode transaction codes and status codes found in PEDS and other examination data.
|
||||
|
||||
### Data Provided
|
||||
|
||||
- **Status codes** - Application status descriptions
|
||||
- **Event codes** - Transaction/event descriptions
|
||||
- **Code definitions** - Official meanings
|
||||
|
||||
### Integration
|
||||
|
||||
Use with PEDS data to interpret transaction codes:
|
||||
|
||||
```python
|
||||
def get_code_description(code, api_key):
|
||||
"""Get human-readable description of USPTO code."""
|
||||
# Fetch from OCE API
|
||||
pass
|
||||
|
||||
def enrich_peds_data(peds_transactions, api_key):
|
||||
"""Add descriptions to PEDS transaction codes."""
|
||||
for trans in peds_transactions:
|
||||
trans['description'] = get_code_description(trans['code'], api_key)
|
||||
return peds_transactions
|
||||
```
|
||||
|
||||
## API Integration Patterns
|
||||
|
||||
### Combined Workflow Example
|
||||
|
||||
```python
|
||||
def comprehensive_patent_analysis(patent_number, api_key):
|
||||
"""
|
||||
Comprehensive analysis combining multiple APIs.
|
||||
"""
|
||||
results = {}
|
||||
|
||||
# 1. Get patent details from PatentSearch
|
||||
results['patent_data'] = search_patent(patent_number, api_key)
|
||||
|
||||
# 2. Get examination history from PEDS
|
||||
results['prosecution'] = get_peds_data(patent_number, api_key)
|
||||
|
||||
# 3. Get assignment history
|
||||
results['assignments'] = get_assignments(patent_number, api_key)
|
||||
|
||||
# 4. Get citation data
|
||||
results['citations'] = get_citations(patent_number, api_key)
|
||||
|
||||
# 5. Check litigation history
|
||||
results['litigation'] = get_litigation(patent_number, api_key)
|
||||
|
||||
# 6. Get PTAB challenges
|
||||
results['ptab'] = get_ptab_proceedings(patent_number, api_key)
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
### Portfolio Analysis Example
|
||||
|
||||
```python
|
||||
def analyze_company_portfolio(company_name, api_key):
|
||||
"""
|
||||
Analyze a company's patent portfolio using multiple APIs.
|
||||
"""
|
||||
# 1. Find all assigned patents
|
||||
assignments = find_company_patents(company_name, api_key)
|
||||
patent_numbers = extract_patent_numbers(assignments)
|
||||
|
||||
# 2. Get details for each patent
|
||||
portfolio = []
|
||||
for patent_num in patent_numbers:
|
||||
patent_data = {
|
||||
'number': patent_num,
|
||||
'details': search_patent(patent_num, api_key),
|
||||
'citations': get_citations(patent_num, api_key),
|
||||
'litigation': get_litigation(patent_num, api_key)
|
||||
}
|
||||
portfolio.append(patent_data)
|
||||
|
||||
# 3. Aggregate statistics
|
||||
stats = {
|
||||
'total_patents': len(portfolio),
|
||||
'cited_by_count': sum(len(p['citations']) for p in portfolio),
|
||||
'litigated_count': sum(1 for p in portfolio if p['litigation']),
|
||||
'technology_areas': aggregate_tech_areas(portfolio)
|
||||
}
|
||||
|
||||
return {'portfolio': portfolio, 'statistics': stats}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **API Key Management** - Use environment variables, never hardcode
|
||||
2. **Rate Limiting** - Implement exponential backoff for all APIs
|
||||
3. **Caching** - Cache API responses to minimize redundant calls
|
||||
4. **Error Handling** - Gracefully handle API errors and missing data
|
||||
5. **Data Validation** - Validate input formats before API calls
|
||||
6. **Combining APIs** - Use appropriate APIs together for comprehensive analysis
|
||||
7. **Documentation** - Keep track of API versions and changes
|
||||
|
||||
## API Key Registration
|
||||
|
||||
All APIs require registration at:
|
||||
**https://account.uspto.gov/api-manager/**
|
||||
|
||||
Single API key works across most USPTO APIs.
|
||||
|
||||
## Resources
|
||||
|
||||
- **Developer Portal**: https://developer.uspto.gov/
|
||||
- **Open Data Portal**: https://data.uspto.gov/
|
||||
- **API Catalog**: https://developer.uspto.gov/api-catalog
|
||||
- **Swagger Docs**: Available for individual APIs
|
||||
@@ -0,0 +1,266 @@
|
||||
# PatentSearch API Reference
|
||||
|
||||
## Overview
|
||||
|
||||
The PatentSearch API is USPTO's modern ElasticSearch-based patent search system that replaced the legacy PatentsView API in May 2025. It provides access to patent data through June 30, 2025, with regular updates.
|
||||
|
||||
**Base URL:** `https://search.patentsview.org/api/v1/`
|
||||
|
||||
## Authentication
|
||||
|
||||
All API requests require authentication using an API key in the request header:
|
||||
|
||||
```
|
||||
X-Api-Key: YOUR_API_KEY
|
||||
```
|
||||
|
||||
Register for an API key at: https://account.uspto.gov/api-manager/
|
||||
|
||||
## Rate Limits
|
||||
|
||||
- **45 requests per minute** per API key
|
||||
- Exceeding rate limits results in HTTP 429 errors
|
||||
|
||||
## Available Endpoints
|
||||
|
||||
### Core Patent & Publication Endpoints
|
||||
|
||||
- **`/patent`** - General patent data (granted patents)
|
||||
- **`/publication`** - Pregrant publication data
|
||||
- **`/publication/rel_app_text`** - Related application data for publications
|
||||
|
||||
### Entity Endpoints
|
||||
|
||||
- **`/inventor`** - Inventor information with location and gender code fields
|
||||
- **`/assignee`** - Assignee details with location identifiers
|
||||
- **`/location`** - Geographic data including latitude/longitude coordinates
|
||||
- **`/attorney`** - Legal representative information
|
||||
|
||||
### Classification Endpoints
|
||||
|
||||
- **`/cpc_subclass`** - Cooperative Patent Classification at subclass level
|
||||
- **`/cpc_at_issue`** - CPC classification as of patent issue date
|
||||
- **`/uspc`** - US Patent Classification data
|
||||
- **`/wipo`** - World Intellectual Property Organization classifications
|
||||
- **`/ipc`** - International Patent Classification
|
||||
|
||||
### Text Data Endpoints (Beta)
|
||||
|
||||
- **`/brief_summary_text`** - Patent brief summaries (granted and pre-grant)
|
||||
- **`/claims`** - Patent claims text
|
||||
- **`/drawing_description_text`** - Drawing descriptions
|
||||
- **`/detail_description_text`** - Detailed description text
|
||||
|
||||
*Note: Text endpoints are in beta with data primarily from 2023 onward. Historical backfilling is in progress.*
|
||||
|
||||
### Supporting Endpoints
|
||||
|
||||
- **`/other_reference`** - Patent reference materials
|
||||
- **`/related_document`** - Cross-references between patents
|
||||
|
||||
## Query Parameters
|
||||
|
||||
All endpoints support four main parameters:
|
||||
|
||||
### 1. Query String (`q`)
|
||||
|
||||
Filters data using JSON query objects. **Required parameter.**
|
||||
|
||||
**Query Operators:**
|
||||
|
||||
- **Equality**: `{"field": "value"}` or `{"field": {"_eq": "value"}}`
|
||||
- **Not equal**: `{"field": {"_neq": "value"}}`
|
||||
- **Comparison**: `_gt`, `_gte`, `_lt`, `_lte`
|
||||
- **String matching**:
|
||||
- `_begins` - starts with
|
||||
- `_contains` - substring match
|
||||
- **Full-text search** (recommended for text fields):
|
||||
- `_text_all` - all terms must match
|
||||
- `_text_any` - any term matches
|
||||
- `_text_phrase` - exact phrase match
|
||||
- **Logical operators**: `_and`, `_or`, `_not`
|
||||
- **Array matching**: Use arrays for OR conditions
|
||||
|
||||
**Examples:**
|
||||
|
||||
```json
|
||||
// Simple equality
|
||||
{"patent_number": "11234567"}
|
||||
|
||||
// Date range
|
||||
{"patent_date": {"_gte": "2020-01-01", "_lte": "2020-12-31"}}
|
||||
|
||||
// Text search (preferred for text fields)
|
||||
{"patent_abstract": {"_text_all": ["machine", "learning"]}}
|
||||
|
||||
// Inventor name
|
||||
{"inventor_name": {"_text_phrase": "John Smith"}}
|
||||
|
||||
// Complex query with logical operators
|
||||
{
|
||||
"_and": [
|
||||
{"patent_date": {"_gte": "2020-01-01"}},
|
||||
{"assignee_organization": {"_text_any": ["Google", "Alphabet"]}}
|
||||
]
|
||||
}
|
||||
|
||||
// Array for OR conditions
|
||||
{"cpc_subclass_id": ["H04N", "H04L"]}
|
||||
```
|
||||
|
||||
### 2. Field List (`f`)
|
||||
|
||||
Specifies which fields to return in the response. Optional - each endpoint has default fields.
|
||||
|
||||
**Format:** JSON array of field names
|
||||
|
||||
```json
|
||||
["patent_number", "patent_title", "patent_date", "inventor_name"]
|
||||
```
|
||||
|
||||
### 3. Sorting (`s`)
|
||||
|
||||
Orders results by specified fields. Optional.
|
||||
|
||||
**Format:** JSON array with field name and direction
|
||||
|
||||
```json
|
||||
[{"patent_date": "desc"}]
|
||||
```
|
||||
|
||||
### 4. Options (`o`)
|
||||
|
||||
Controls pagination and additional settings. Optional.
|
||||
|
||||
**Available options:**
|
||||
|
||||
- `page` - Page number (default: 1)
|
||||
- `per_page` - Records per page (default: 100, max: 1,000)
|
||||
- `pad_patent_id` - Pad patent IDs with leading zeros (default: false)
|
||||
- `exclude_withdrawn` - Exclude withdrawn patents (default: true)
|
||||
|
||||
**Format:** JSON object
|
||||
|
||||
```json
|
||||
{
|
||||
"page": 1,
|
||||
"per_page": 500,
|
||||
"exclude_withdrawn": false
|
||||
}
|
||||
```
|
||||
|
||||
## Response Format
|
||||
|
||||
All responses follow this structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"error": false,
|
||||
"count": 100,
|
||||
"total_hits": 5432,
|
||||
"patents": [...],
|
||||
// or "inventors": [...], "assignees": [...], etc.
|
||||
}
|
||||
```
|
||||
|
||||
- `error` - Boolean indicating if an error occurred
|
||||
- `count` - Number of records in current response
|
||||
- `total_hits` - Total number of matching records
|
||||
- Endpoint-specific data array (e.g., `patents`, `inventors`)
|
||||
|
||||
## Complete Request Example
|
||||
|
||||
### Using curl
|
||||
|
||||
```bash
|
||||
curl -X POST "https://search.patentsview.org/api/v1/patent" \
|
||||
-H "X-Api-Key: YOUR_API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"q": {
|
||||
"_and": [
|
||||
{"patent_date": {"_gte": "2024-01-01"}},
|
||||
{"patent_abstract": {"_text_all": ["artificial", "intelligence"]}}
|
||||
]
|
||||
},
|
||||
"f": ["patent_number", "patent_title", "patent_date", "assignee_organization"],
|
||||
"s": [{"patent_date": "desc"}],
|
||||
"o": {"per_page": 100}
|
||||
}'
|
||||
```
|
||||
|
||||
### Using Python
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
url = "https://search.patentsview.org/api/v1/patent"
|
||||
headers = {
|
||||
"X-Api-Key": "YOUR_API_KEY",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
data = {
|
||||
"q": {
|
||||
"_and": [
|
||||
{"patent_date": {"_gte": "2024-01-01"}},
|
||||
{"patent_abstract": {"_text_all": ["artificial", "intelligence"]}}
|
||||
]
|
||||
},
|
||||
"f": ["patent_number", "patent_title", "patent_date", "assignee_organization"],
|
||||
"s": [{"patent_date": "desc"}],
|
||||
"o": {"per_page": 100}
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
results = response.json()
|
||||
```
|
||||
|
||||
## Common Field Names
|
||||
|
||||
### Patent Endpoint Fields
|
||||
|
||||
- `patent_number` - Patent number
|
||||
- `patent_title` - Title of the patent
|
||||
- `patent_date` - Grant date
|
||||
- `patent_abstract` - Abstract text
|
||||
- `patent_type` - Type of patent
|
||||
- `inventor_name` - Inventor names (array)
|
||||
- `assignee_organization` - Assignee company names (array)
|
||||
- `cpc_subclass_id` - CPC classification codes
|
||||
- `uspc_class` - US classification codes
|
||||
- `cited_patent_number` - Citations to other patents
|
||||
- `citedby_patent_number` - Patents citing this patent
|
||||
|
||||
Refer to the full field dictionary at: https://search.patentsview.org/docs/
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use `_text*` operators for text fields** - More performant than `_contains` or `_begins`
|
||||
2. **Request only needed fields** - Reduces response size and improves performance
|
||||
3. **Implement pagination** - Handle large result sets efficiently
|
||||
4. **Respect rate limits** - Implement backoff/retry logic for 429 errors
|
||||
5. **Cache results** - Reduce redundant API calls
|
||||
6. **Use date ranges** - Narrow searches to improve performance
|
||||
|
||||
## Error Handling
|
||||
|
||||
Common HTTP status codes:
|
||||
|
||||
- **200** - Success
|
||||
- **400** - Bad request (invalid query syntax)
|
||||
- **401** - Unauthorized (missing or invalid API key)
|
||||
- **429** - Too many requests (rate limit exceeded)
|
||||
- **500** - Server error
|
||||
|
||||
## Recent Updates (February 2025)
|
||||
|
||||
- Data updated through December 31, 2024
|
||||
- New `pad_patent_id` option for formatting patent IDs
|
||||
- New `exclude_withdrawn` option to show withdrawn patents
|
||||
- Text endpoints continue beta backfilling
|
||||
|
||||
## Resources
|
||||
|
||||
- **Official Documentation**: https://search.patentsview.org/docs/
|
||||
- **API Key Registration**: https://account.uspto.gov/api-manager/
|
||||
- **Legacy API Notice**: The old PatentsView API was discontinued May 1, 2025
|
||||
212
scientific-databases/uspto-database/references/peds_api.md
Normal file
212
scientific-databases/uspto-database/references/peds_api.md
Normal file
@@ -0,0 +1,212 @@
|
||||
# Patent Examination Data System (PEDS) API Reference
|
||||
|
||||
## Overview
|
||||
|
||||
The Patent Examination Data System (PEDS) provides access to USPTO patent application and filing status records. It contains bibliographic data, published document information, and patent term extension data.
|
||||
|
||||
**Data Coverage:** 1981 to present (some data back to 1935)
|
||||
|
||||
**Base URL:** Access through USPTO Open Data Portal
|
||||
|
||||
## What PEDS Provides
|
||||
|
||||
PEDS gives comprehensive transaction history and status information for patent applications:
|
||||
|
||||
- **Bibliographic data** - Application numbers, filing dates, titles, inventors, assignees
|
||||
- **Published documents** - Publication numbers and dates
|
||||
- **Transaction history** - All examination events with dates, codes, and descriptions
|
||||
- **Patent term adjustments** - PTA/PTE information
|
||||
- **Application status** - Current status and status codes
|
||||
- **File wrapper access** - Links to prosecution documents
|
||||
|
||||
## Key Features
|
||||
|
||||
1. **Transaction Activity** - Complete examination timeline with transaction dates, codes, and descriptions
|
||||
2. **Status Information** - Current application status and status codes
|
||||
3. **Bibliographic Updates** - Changes to inventors, assignees, titles over time
|
||||
4. **Family Data** - Related applications and continuity data
|
||||
5. **Office Action Tracking** - Mail dates and office action information
|
||||
|
||||
## Python Library: uspto-opendata-python
|
||||
|
||||
The recommended way to access PEDS is through the `uspto-opendata-python` library.
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
pip install uspto-opendata-python
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uspto.peds import PE DSClient
|
||||
|
||||
# Initialize client
|
||||
client = PEDSClient()
|
||||
|
||||
# Search by application number
|
||||
app_number = "16123456"
|
||||
result = client.get_application(app_number)
|
||||
|
||||
# Access application data
|
||||
print(f"Title: {result['title']}")
|
||||
print(f"Filing Date: {result['filing_date']}")
|
||||
print(f"Status: {result['status']}")
|
||||
|
||||
# Get transaction history
|
||||
transactions = result['transactions']
|
||||
for trans in transactions:
|
||||
print(f"{trans['date']}: {trans['code']} - {trans['description']}")
|
||||
```
|
||||
|
||||
### Search Methods
|
||||
|
||||
```python
|
||||
# By application number
|
||||
client.get_application("16123456")
|
||||
|
||||
# By patent number
|
||||
client.get_patent("11234567")
|
||||
|
||||
# By customer number (assignee)
|
||||
client.search_by_customer_number("12345")
|
||||
|
||||
# Bulk retrieval
|
||||
app_numbers = ["16123456", "16123457", "16123458"]
|
||||
results = client.bulk_retrieve(app_numbers)
|
||||
```
|
||||
|
||||
## Data Fields
|
||||
|
||||
### Bibliographic Fields
|
||||
|
||||
- `application_number` - Application number
|
||||
- `filing_date` - Filing date
|
||||
- `patent_number` - Patent number (if granted)
|
||||
- `patent_issue_date` - Issue date (if granted)
|
||||
- `title` - Application/patent title
|
||||
- `inventors` - List of inventors
|
||||
- `assignees` - List of assignees
|
||||
- `app_type` - Application type (utility, design, plant, reissue)
|
||||
- `app_status` - Current application status
|
||||
- `app_status_date` - Status date
|
||||
|
||||
### Transaction Fields
|
||||
|
||||
- `transaction_date` - Date of transaction
|
||||
- `transaction_code` - USPTO event code
|
||||
- `transaction_description` - Description of event
|
||||
- `mail_date` - Mail room date (for office actions)
|
||||
|
||||
### Patent Term Data
|
||||
|
||||
- `pta_pte_summary` - Patent term adjustment/extension summary
|
||||
- `pta_pte_history` - History of term calculations
|
||||
|
||||
## Status Codes
|
||||
|
||||
Common application status codes:
|
||||
|
||||
- **Patented Case** - Patent has been granted
|
||||
- **Abandoned** - Application is abandoned
|
||||
- **Pending** - Application is under examination
|
||||
- **Allowed** - Application has been allowed, awaiting issue
|
||||
- **Final Rejection** - Final rejection issued
|
||||
- **Non-Final Rejection** - Non-final rejection issued
|
||||
- **Response Filed** - Applicant response filed
|
||||
|
||||
## Transaction Codes
|
||||
|
||||
Common transaction codes include:
|
||||
|
||||
- **CTNF** - Non-final rejection mailed
|
||||
- **CTFR** - Final rejection mailed
|
||||
- **AOPF** - Office action mailed
|
||||
- **WRIT** - Response filed
|
||||
- **NOA** - Notice of allowance mailed
|
||||
- **ISS.FEE** - Issue fee payment
|
||||
- **ABND** - Application abandoned
|
||||
|
||||
Full code list available in OCE Patent Examination Status/Event Codes API.
|
||||
|
||||
## Use Cases
|
||||
|
||||
### 1. Track Application Progress
|
||||
|
||||
Monitor pending applications for office actions and status changes.
|
||||
|
||||
```python
|
||||
# Get current status
|
||||
app = client.get_application("16123456")
|
||||
print(f"Current status: {app['app_status']}")
|
||||
print(f"Status date: {app['app_status_date']}")
|
||||
|
||||
# Check for recent office actions
|
||||
recent_oas = [t for t in app['transactions']
|
||||
if t['code'] in ['CTNF', 'CTFR', 'AOPF']
|
||||
and t['date'] > '2024-01-01']
|
||||
```
|
||||
|
||||
### 2. Portfolio Analysis
|
||||
|
||||
Analyze prosecution history across a portfolio.
|
||||
|
||||
```python
|
||||
# Get all applications for an assignee
|
||||
apps = client.search_by_customer_number("12345")
|
||||
|
||||
# Calculate average pendency
|
||||
pendencies = []
|
||||
for app in apps:
|
||||
if app['patent_issue_date']:
|
||||
filing = datetime.strptime(app['filing_date'], '%Y-%m-%d')
|
||||
issue = datetime.strptime(app['patent_issue_date'], '%Y-%m-%d')
|
||||
pendencies.append((issue - filing).days)
|
||||
|
||||
avg_pendency = sum(pendencies) / len(pendencies)
|
||||
print(f"Average pendency: {avg_pendency} days")
|
||||
```
|
||||
|
||||
### 3. Examine Rejection Patterns
|
||||
|
||||
Analyze types of rejections received.
|
||||
|
||||
```python
|
||||
# Count rejection types
|
||||
rejections = {}
|
||||
for trans in app['transactions']:
|
||||
if 'rejection' in trans['description'].lower():
|
||||
code = trans['code']
|
||||
rejections[code] = rejections.get(code, 0) + 1
|
||||
```
|
||||
|
||||
## Integration with Other APIs
|
||||
|
||||
PEDS data can be combined with other USPTO APIs:
|
||||
|
||||
- **Office Action Text API** - Retrieve full text of office actions using application number
|
||||
- **Patent Assignment Search** - Find ownership changes
|
||||
- **PTAB API** - Check for appeal proceedings
|
||||
|
||||
## Important Notes
|
||||
|
||||
1. **PAIR Bulk Data (PBD) is decommissioned** - Use PEDS instead
|
||||
2. **Data updates** - PEDS is updated regularly but may have 1-2 day lag
|
||||
3. **Application numbers** - Use standardized format (no slashes or spaces)
|
||||
4. **Continuity data** - Parent/child applications tracked in transaction history
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Batch requests** - Use bulk retrieval for multiple applications
|
||||
2. **Cache data** - Avoid redundant API calls for same application
|
||||
3. **Monitor updates** - Check for transaction updates regularly
|
||||
4. **Handle missing data** - Not all fields populated for all applications
|
||||
5. **Parse transaction codes** - Use code descriptions for user-friendly display
|
||||
|
||||
## Resources
|
||||
|
||||
- **Library Documentation**: https://docs.ip-tools.org/uspto-opendata-python/
|
||||
- **PyPI Package**: https://pypi.org/project/uspto-opendata-python/
|
||||
- **GitHub Repository**: https://github.com/ip-tools/uspto-opendata-python
|
||||
- **USPTO PEDS Portal**: https://ped.uspto.gov/
|
||||
358
scientific-databases/uspto-database/references/trademark_api.md
Normal file
358
scientific-databases/uspto-database/references/trademark_api.md
Normal file
@@ -0,0 +1,358 @@
|
||||
# USPTO Trademark APIs Reference
|
||||
|
||||
## Overview
|
||||
|
||||
USPTO provides two main APIs for trademark data:
|
||||
|
||||
1. **Trademark Status & Document Retrieval (TSDR)** - Retrieve trademark case status and documents
|
||||
2. **Trademark Assignment Search** - Search trademark assignment records
|
||||
|
||||
## 1. Trademark Status & Document Retrieval (TSDR) API
|
||||
|
||||
### Overview
|
||||
|
||||
TSDR enables programmatic retrieval of trademark case status documents and information.
|
||||
|
||||
**API Version:** v1.0
|
||||
|
||||
**Base URL:** `https://tsdrapi.uspto.gov/ts/cd/`
|
||||
|
||||
### Authentication
|
||||
|
||||
Requires API key registration at: https://account.uspto.gov/api-manager/
|
||||
|
||||
Include API key in request header:
|
||||
```
|
||||
X-Api-Key: YOUR_API_KEY
|
||||
```
|
||||
|
||||
### Endpoints
|
||||
|
||||
#### Get Trademark Status by Serial Number
|
||||
|
||||
```
|
||||
GET /ts/cd/casedocs/sn{serial_number}/info.json
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
curl -H "X-Api-Key: YOUR_KEY" \
|
||||
"https://tsdrapi.uspto.gov/ts/cd/casedocs/sn87654321/info.json"
|
||||
```
|
||||
|
||||
#### Get Trademark Status by Registration Number
|
||||
|
||||
```
|
||||
GET /ts/cd/casedocs/rn{registration_number}/info.json
|
||||
```
|
||||
|
||||
### Response Format
|
||||
|
||||
Returns JSON with comprehensive trademark information:
|
||||
|
||||
```json
|
||||
{
|
||||
"TradeMarkAppln": {
|
||||
"ApplicationNumber": "87654321",
|
||||
"ApplicationDate": "2017-10-15",
|
||||
"RegistrationNumber": "5678901",
|
||||
"RegistrationDate": "2019-03-12",
|
||||
"MarkVerbalElementText": "EXAMPLE MARK",
|
||||
"MarkCurrentStatusExternalDescriptionText": "REGISTERED",
|
||||
"MarkCurrentStatusDate": "2019-03-12",
|
||||
"GoodsAndServices": [...],
|
||||
"Owners": [...],
|
||||
"Correspondents": [...]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Key Data Fields
|
||||
|
||||
- **Application Information:**
|
||||
- `ApplicationNumber` - Serial number
|
||||
- `ApplicationDate` - Filing date
|
||||
- `ApplicationType` - Type (TEAS Plus, TEAS Standard, etc.)
|
||||
|
||||
- **Registration Information:**
|
||||
- `RegistrationNumber` - Registration number (if registered)
|
||||
- `RegistrationDate` - Registration date
|
||||
|
||||
- **Mark Information:**
|
||||
- `MarkVerbalElementText` - Text of the mark
|
||||
- `MarkCurrentStatusExternalDescriptionText` - Current status
|
||||
- `MarkCurrentStatusDate` - Status date
|
||||
- `MarkDrawingCode` - Type of mark (words, design, etc.)
|
||||
|
||||
- **Classification:**
|
||||
- `GoodsAndServices` - Array of goods/services with classes
|
||||
|
||||
- **Owner Information:**
|
||||
- `Owners` - Array of trademark owners/applicants
|
||||
|
||||
- **Prosecution History:**
|
||||
- `ProsecutionHistoryEntry` - Array of events in prosecution
|
||||
|
||||
### Common Status Values
|
||||
|
||||
- **REGISTERED** - Mark is registered and active
|
||||
- **PENDING** - Application under examination
|
||||
- **ABANDONED** - Application/registration abandoned
|
||||
- **CANCELLED** - Registration cancelled
|
||||
- **SUSPENDED** - Examination suspended
|
||||
- **PUBLISHED FOR OPPOSITION** - Published, in opposition period
|
||||
- **REGISTERED AND RENEWED** - Registration renewed
|
||||
|
||||
### Python Example
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
def get_trademark_status(serial_number, api_key):
|
||||
"""Retrieve trademark status by serial number."""
|
||||
url = f"https://tsdrapi.uspto.gov/ts/cd/casedocs/sn{serial_number}/info.json"
|
||||
headers = {"X-Api-Key": api_key}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code}")
|
||||
|
||||
# Usage
|
||||
data = get_trademark_status("87654321", "YOUR_API_KEY")
|
||||
trademark = data['TradeMarkAppln']
|
||||
|
||||
print(f"Mark: {trademark['MarkVerbalElementText']}")
|
||||
print(f"Status: {trademark['MarkCurrentStatusExternalDescriptionText']}")
|
||||
print(f"Application Date: {trademark['ApplicationDate']}")
|
||||
if 'RegistrationNumber' in trademark:
|
||||
print(f"Registration #: {trademark['RegistrationNumber']}")
|
||||
```
|
||||
|
||||
## 2. Trademark Assignment Search API
|
||||
|
||||
### Overview
|
||||
|
||||
Retrieves trademark assignment records from the USPTO assignment database. Shows ownership transfers and security interests.
|
||||
|
||||
**API Version:** v1.4
|
||||
|
||||
**Base URL:** `https://assignment-api.uspto.gov/trademark/`
|
||||
|
||||
### Authentication
|
||||
|
||||
Requires API key in header:
|
||||
```
|
||||
X-Api-Key: YOUR_API_KEY
|
||||
```
|
||||
|
||||
### Search Methods
|
||||
|
||||
#### By Registration Number
|
||||
|
||||
```
|
||||
GET /v1.4/assignment/application/{registration_number}
|
||||
```
|
||||
|
||||
#### By Serial Number
|
||||
|
||||
```
|
||||
GET /v1.4/assignment/application/{serial_number}
|
||||
```
|
||||
|
||||
#### By Assignee Name
|
||||
|
||||
```
|
||||
POST /v1.4/assignment/search
|
||||
```
|
||||
|
||||
**Request body:**
|
||||
```json
|
||||
{
|
||||
"criteria": {
|
||||
"assigneeName": "Company Name"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Response Format
|
||||
|
||||
Returns XML containing assignment records:
|
||||
|
||||
```xml
|
||||
<assignments>
|
||||
<assignment>
|
||||
<reelFrame>12345/0678</reelFrame>
|
||||
<conveyanceText>ASSIGNMENT OF ASSIGNORS INTEREST</conveyanceText>
|
||||
<recordedDate>2020-01-15</recordedDate>
|
||||
<executionDate>2020-01-10</executionDate>
|
||||
<assignors>
|
||||
<assignor>
|
||||
<name>Original Owner LLC</name>
|
||||
</assignor>
|
||||
</assignors>
|
||||
<assignees>
|
||||
<assignee>
|
||||
<name>New Owner Corporation</name>
|
||||
</assignee>
|
||||
</assignees>
|
||||
</assignment>
|
||||
</assignments>
|
||||
```
|
||||
|
||||
### Key Fields
|
||||
|
||||
- `reelFrame` - USPTO reel and frame number
|
||||
- `conveyanceText` - Type of transaction
|
||||
- `recordedDate` - Date recorded at USPTO
|
||||
- `executionDate` - Date document was executed
|
||||
- `assignors` - Original owners
|
||||
- `assignees` - New owners
|
||||
- `propertyNumbers` - Affected serial/registration numbers
|
||||
|
||||
### Common Conveyance Types
|
||||
|
||||
- **ASSIGNMENT OF ASSIGNORS INTEREST** - Ownership transfer
|
||||
- **SECURITY AGREEMENT** - Collateral/security interest
|
||||
- **MERGER** - Corporate merger
|
||||
- **CHANGE OF NAME** - Name change
|
||||
- **ASSIGNMENT OF PARTIAL INTEREST** - Partial ownership transfer
|
||||
|
||||
### Python Example
|
||||
|
||||
```python
|
||||
import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
def search_trademark_assignments(registration_number, api_key):
|
||||
"""Search assignments for a trademark registration."""
|
||||
url = f"https://assignment-api.uspto.gov/trademark/v1.4/assignment/application/{registration_number}"
|
||||
headers = {"X-Api-Key": api_key}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
if response.status_code == 200:
|
||||
return response.text # Returns XML
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code}")
|
||||
|
||||
# Usage
|
||||
xml_data = search_trademark_assignments("5678901", "YOUR_API_KEY")
|
||||
root = ET.fromstring(xml_data)
|
||||
|
||||
for assignment in root.findall('.//assignment'):
|
||||
reel_frame = assignment.find('reelFrame').text
|
||||
recorded_date = assignment.find('recordedDate').text
|
||||
conveyance = assignment.find('conveyanceText').text
|
||||
|
||||
assignor = assignment.find('.//assignor/name').text
|
||||
assignee = assignment.find('.//assignee/name').text
|
||||
|
||||
print(f"{recorded_date}: {assignor} -> {assignee}")
|
||||
print(f" Type: {conveyance}")
|
||||
print(f" Reel/Frame: {reel_frame}\n")
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### 1. Monitor Trademark Status
|
||||
|
||||
Check status of pending applications or registrations:
|
||||
|
||||
```python
|
||||
def check_trademark_health(serial_number, api_key):
|
||||
"""Check if trademark needs attention."""
|
||||
data = get_trademark_status(serial_number, api_key)
|
||||
tm = data['TradeMarkAppln']
|
||||
|
||||
status = tm['MarkCurrentStatusExternalDescriptionText']
|
||||
alerts = []
|
||||
|
||||
if 'ABANDON' in status:
|
||||
alerts.append("⚠️ ABANDONED")
|
||||
elif 'PUBLISHED' in status:
|
||||
alerts.append("📢 In opposition period")
|
||||
elif 'SUSPENDED' in status:
|
||||
alerts.append("⏸️ Examination suspended")
|
||||
elif 'REGISTERED' in status:
|
||||
alerts.append("✅ Active")
|
||||
|
||||
return alerts
|
||||
```
|
||||
|
||||
### 2. Track Ownership Changes
|
||||
|
||||
Monitor assignment records for ownership changes:
|
||||
|
||||
```python
|
||||
def get_current_owner(registration_number, api_key):
|
||||
"""Find current trademark owner from assignment records."""
|
||||
xml_data = search_trademark_assignments(registration_number, api_key)
|
||||
root = ET.fromstring(xml_data)
|
||||
|
||||
assignments = []
|
||||
for assignment in root.findall('.//assignment'):
|
||||
date = assignment.find('recordedDate').text
|
||||
assignee = assignment.find('.//assignee/name').text
|
||||
assignments.append((date, assignee))
|
||||
|
||||
# Most recent assignment
|
||||
if assignments:
|
||||
assignments.sort(reverse=True)
|
||||
return assignments[0][1]
|
||||
return None
|
||||
```
|
||||
|
||||
### 3. Portfolio Management
|
||||
|
||||
Analyze trademark portfolio:
|
||||
|
||||
```python
|
||||
def analyze_portfolio(serial_numbers, api_key):
|
||||
"""Analyze status of multiple trademarks."""
|
||||
results = {
|
||||
'active': 0,
|
||||
'pending': 0,
|
||||
'abandoned': 0,
|
||||
'expired': 0
|
||||
}
|
||||
|
||||
for sn in serial_numbers:
|
||||
data = get_trademark_status(sn, api_key)
|
||||
status = data['TradeMarkAppln']['MarkCurrentStatusExternalDescriptionText']
|
||||
|
||||
if 'REGISTERED' in status:
|
||||
results['active'] += 1
|
||||
elif 'PENDING' in status or 'PUBLISHED' in status:
|
||||
results['pending'] += 1
|
||||
elif 'ABANDON' in status:
|
||||
results['abandoned'] += 1
|
||||
elif 'EXPIRED' in status or 'CANCELLED' in status:
|
||||
results['expired'] += 1
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
## Rate Limits and Best Practices
|
||||
|
||||
1. **Respect rate limits** - Implement retry logic with exponential backoff
|
||||
2. **Cache responses** - Trademark data changes infrequently
|
||||
3. **Batch processing** - Spread requests over time for large portfolios
|
||||
4. **Error handling** - Handle missing data gracefully (not all marks have all fields)
|
||||
5. **Data validation** - Verify serial/registration numbers before API calls
|
||||
|
||||
## Integration with Other Data
|
||||
|
||||
Combine trademark data with other sources:
|
||||
|
||||
- **TSDR + Assignment** - Current status + ownership history
|
||||
- **Multiple marks** - Analyze related marks in a family
|
||||
- **Patent data** - Cross-reference IP portfolio
|
||||
|
||||
## Resources
|
||||
|
||||
- **TSDR API**: https://developer.uspto.gov/api-catalog/tsdr-data-api
|
||||
- **Assignment API**: https://developer.uspto.gov/api-catalog/trademark-assignment-search-data-api
|
||||
- **API Key Registration**: https://account.uspto.gov/api-manager/
|
||||
- **Trademark Search**: https://tmsearch.uspto.gov/
|
||||
- **Swagger Documentation**: https://developer.uspto.gov/swagger/tsdr-api-v1
|
||||
290
scientific-databases/uspto-database/scripts/patent_search.py
Normal file
290
scientific-databases/uspto-database/scripts/patent_search.py
Normal file
@@ -0,0 +1,290 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
USPTO PatentSearch API Helper
|
||||
|
||||
Provides functions for searching and retrieving patent data using the USPTO
|
||||
PatentSearch API (ElasticSearch-based system, replaced legacy PatentsView in May 2025).
|
||||
|
||||
Requires:
|
||||
- requests library: pip install requests
|
||||
- USPTO API key from https://account.uspto.gov/api-manager/
|
||||
|
||||
Environment variables:
|
||||
USPTO_API_KEY - Your USPTO API key
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class PatentSearchClient:
|
||||
"""Client for USPTO PatentSearch API."""
|
||||
|
||||
BASE_URL = "https://search.patentsview.org/api/v1"
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize client with API key.
|
||||
|
||||
Args:
|
||||
api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var)
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("USPTO_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.")
|
||||
|
||||
self.headers = {
|
||||
"X-Api-Key": self.api_key,
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
def _request(self, endpoint: str, query: Dict, fields: Optional[List[str]] = None,
|
||||
sort: Optional[List[Dict]] = None, options: Optional[Dict] = None) -> Dict:
|
||||
"""
|
||||
Make a request to the PatentSearch API.
|
||||
|
||||
Args:
|
||||
endpoint: API endpoint (e.g., "patent", "inventor")
|
||||
query: Query dictionary
|
||||
fields: List of fields to return
|
||||
sort: Sort specification
|
||||
options: Pagination and other options
|
||||
|
||||
Returns:
|
||||
API response as dictionary
|
||||
"""
|
||||
url = f"{self.BASE_URL}/{endpoint}"
|
||||
|
||||
data = {"q": query}
|
||||
if fields:
|
||||
data["f"] = fields
|
||||
if sort:
|
||||
data["s"] = sort
|
||||
if options:
|
||||
data["o"] = options
|
||||
|
||||
response = requests.post(url, headers=self.headers, json=data)
|
||||
response.raise_for_status()
|
||||
|
||||
return response.json()
|
||||
|
||||
def search_patents(self, query: Dict, fields: Optional[List[str]] = None,
|
||||
sort: Optional[List[Dict]] = None, page: int = 1,
|
||||
per_page: int = 100) -> Dict:
|
||||
"""
|
||||
Search for patents.
|
||||
|
||||
Args:
|
||||
query: Query dictionary (see PatentSearch API docs for syntax)
|
||||
fields: Fields to return (defaults to essential fields)
|
||||
sort: Sort specification
|
||||
page: Page number
|
||||
per_page: Results per page (max 1000)
|
||||
|
||||
Returns:
|
||||
Search results with patents array
|
||||
|
||||
Example:
|
||||
# Search by keyword
|
||||
results = client.search_patents({
|
||||
"patent_abstract": {"_text_all": ["machine", "learning"]}
|
||||
})
|
||||
|
||||
# Search by date range
|
||||
results = client.search_patents({
|
||||
"patent_date": {"_gte": "2024-01-01", "_lte": "2024-12-31"}
|
||||
})
|
||||
"""
|
||||
if fields is None:
|
||||
fields = [
|
||||
"patent_number", "patent_title", "patent_date",
|
||||
"patent_abstract", "assignee_organization",
|
||||
"inventor_name", "cpc_subclass_id"
|
||||
]
|
||||
|
||||
if sort is None:
|
||||
sort = [{"patent_date": "desc"}]
|
||||
|
||||
options = {"page": page, "per_page": min(per_page, 1000)}
|
||||
|
||||
return self._request("patent", query, fields, sort, options)
|
||||
|
||||
def get_patent(self, patent_number: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get details for a specific patent by number.
|
||||
|
||||
Args:
|
||||
patent_number: Patent number (with or without commas)
|
||||
|
||||
Returns:
|
||||
Patent data dictionary or None if not found
|
||||
"""
|
||||
# Remove commas from patent number
|
||||
patent_number = patent_number.replace(",", "")
|
||||
|
||||
query = {"patent_number": patent_number}
|
||||
fields = [
|
||||
"patent_number", "patent_title", "patent_date", "patent_abstract",
|
||||
"patent_type", "inventor_name", "assignee_organization",
|
||||
"cpc_subclass_id", "cited_patent_number", "citedby_patent_number"
|
||||
]
|
||||
|
||||
result = self._request("patent", query, fields)
|
||||
|
||||
if result.get("patents"):
|
||||
return result["patents"][0]
|
||||
return None
|
||||
|
||||
def search_by_inventor(self, inventor_name: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by inventor name.
|
||||
|
||||
Args:
|
||||
inventor_name: Inventor name (use _text_phrase for exact match)
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {"inventor_name": {"_text_phrase": inventor_name}}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def search_by_assignee(self, assignee_name: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by assignee/company name.
|
||||
|
||||
Args:
|
||||
assignee_name: Assignee/company name
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {"assignee_organization": {"_text_any": assignee_name.split()}}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def search_by_classification(self, cpc_code: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by CPC classification code.
|
||||
|
||||
Args:
|
||||
cpc_code: CPC subclass code (e.g., "H04N", "G06F")
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {"cpc_subclass_id": cpc_code}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def search_by_date_range(self, start_date: str, end_date: str, **kwargs) -> Dict:
|
||||
"""
|
||||
Search patents by date range.
|
||||
|
||||
Args:
|
||||
start_date: Start date (YYYY-MM-DD)
|
||||
end_date: End date (YYYY-MM-DD)
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
query = {
|
||||
"patent_date": {
|
||||
"_gte": start_date,
|
||||
"_lte": end_date
|
||||
}
|
||||
}
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
def advanced_search(self, keywords: List[str], assignee: Optional[str] = None,
|
||||
start_date: Optional[str] = None, end_date: Optional[str] = None,
|
||||
cpc_codes: Optional[List[str]] = None, **kwargs) -> Dict:
|
||||
"""
|
||||
Perform advanced search with multiple criteria.
|
||||
|
||||
Args:
|
||||
keywords: List of keywords to search in abstract/title
|
||||
assignee: Assignee/company name
|
||||
start_date: Start date (YYYY-MM-DD)
|
||||
end_date: End date (YYYY-MM-DD)
|
||||
cpc_codes: List of CPC classification codes
|
||||
**kwargs: Additional search parameters
|
||||
|
||||
Returns:
|
||||
Search results
|
||||
"""
|
||||
conditions = []
|
||||
|
||||
# Keyword search in abstract
|
||||
if keywords:
|
||||
conditions.append({
|
||||
"patent_abstract": {"_text_all": keywords}
|
||||
})
|
||||
|
||||
# Assignee filter
|
||||
if assignee:
|
||||
conditions.append({
|
||||
"assignee_organization": {"_text_any": assignee.split()}
|
||||
})
|
||||
|
||||
# Date range
|
||||
if start_date and end_date:
|
||||
conditions.append({
|
||||
"patent_date": {"_gte": start_date, "_lte": end_date}
|
||||
})
|
||||
|
||||
# CPC classification
|
||||
if cpc_codes:
|
||||
conditions.append({
|
||||
"cpc_subclass_id": cpc_codes
|
||||
})
|
||||
|
||||
query = {"_and": conditions} if len(conditions) > 1 else conditions[0]
|
||||
|
||||
return self.search_patents(query, **kwargs)
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface for patent search."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage:")
|
||||
print(" python patent_search.py <patent_number>")
|
||||
print(" python patent_search.py --inventor <name>")
|
||||
print(" python patent_search.py --assignee <company>")
|
||||
print(" python patent_search.py --keywords <word1> <word2> ...")
|
||||
sys.exit(1)
|
||||
|
||||
client = PatentSearchClient()
|
||||
|
||||
try:
|
||||
if sys.argv[1] == "--inventor":
|
||||
results = client.search_by_inventor(" ".join(sys.argv[2:]))
|
||||
elif sys.argv[1] == "--assignee":
|
||||
results = client.search_by_assignee(" ".join(sys.argv[2:]))
|
||||
elif sys.argv[1] == "--keywords":
|
||||
query = {"patent_abstract": {"_text_all": sys.argv[2:]}}
|
||||
results = client.search_patents(query)
|
||||
else:
|
||||
# Assume patent number
|
||||
patent = client.get_patent(sys.argv[1])
|
||||
if patent:
|
||||
results = {"patents": [patent], "count": 1, "total_hits": 1}
|
||||
else:
|
||||
print(f"Patent {sys.argv[1]} not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Print results
|
||||
print(json.dumps(results, indent=2))
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
253
scientific-databases/uspto-database/scripts/peds_client.py
Normal file
253
scientific-databases/uspto-database/scripts/peds_client.py
Normal file
@@ -0,0 +1,253 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
USPTO Patent Examination Data System (PEDS) Helper
|
||||
|
||||
Provides functions for retrieving patent examination data using the
|
||||
uspto-opendata-python library.
|
||||
|
||||
Requires:
|
||||
- uspto-opendata-python: pip install uspto-opendata-python
|
||||
|
||||
Note: This script provides a simplified interface to PEDS data.
|
||||
For full functionality, use the uspto-opendata-python library directly.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
|
||||
try:
|
||||
from uspto.peds import PEDSClient as OriginalPEDSClient
|
||||
HAS_USPTO_LIB = True
|
||||
except ImportError:
|
||||
HAS_USPTO_LIB = False
|
||||
print("Warning: uspto-opendata-python not installed.", file=sys.stderr)
|
||||
print("Install with: pip install uspto-opendata-python", file=sys.stderr)
|
||||
|
||||
|
||||
class PEDSHelper:
|
||||
"""Helper class for accessing PEDS data."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize PEDS client."""
|
||||
if not HAS_USPTO_LIB:
|
||||
raise ImportError("uspto-opendata-python library required")
|
||||
self.client = OriginalPEDSClient()
|
||||
|
||||
def get_application(self, application_number: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get patent application data by application number.
|
||||
|
||||
Args:
|
||||
application_number: Application number (e.g., "16123456")
|
||||
|
||||
Returns:
|
||||
Application data dictionary with:
|
||||
- title: Application title
|
||||
- filing_date: Filing date
|
||||
- status: Current status
|
||||
- transactions: List of prosecution events
|
||||
- inventors: List of inventors
|
||||
- assignees: List of assignees
|
||||
"""
|
||||
try:
|
||||
result = self.client.get_application(application_number)
|
||||
return self._format_application_data(result)
|
||||
except Exception as e:
|
||||
print(f"Error retrieving application {application_number}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def get_patent(self, patent_number: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get patent data by patent number.
|
||||
|
||||
Args:
|
||||
patent_number: Patent number (e.g., "11234567")
|
||||
|
||||
Returns:
|
||||
Patent data dictionary
|
||||
"""
|
||||
try:
|
||||
result = self.client.get_patent(patent_number)
|
||||
return self._format_application_data(result)
|
||||
except Exception as e:
|
||||
print(f"Error retrieving patent {patent_number}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def get_transaction_history(self, application_number: str) -> List[Dict]:
|
||||
"""
|
||||
Get transaction history for an application.
|
||||
|
||||
Args:
|
||||
application_number: Application number
|
||||
|
||||
Returns:
|
||||
List of transactions with date, code, and description
|
||||
"""
|
||||
app_data = self.get_application(application_number)
|
||||
if app_data and 'transactions' in app_data:
|
||||
return app_data['transactions']
|
||||
return []
|
||||
|
||||
def get_office_actions(self, application_number: str) -> List[Dict]:
|
||||
"""
|
||||
Get office actions for an application.
|
||||
|
||||
Args:
|
||||
application_number: Application number
|
||||
|
||||
Returns:
|
||||
List of office actions with dates and types
|
||||
"""
|
||||
transactions = self.get_transaction_history(application_number)
|
||||
|
||||
# Filter for office action transaction codes
|
||||
oa_codes = ['CTNF', 'CTFR', 'AOPF', 'NOA']
|
||||
|
||||
office_actions = [
|
||||
trans for trans in transactions
|
||||
if trans.get('code') in oa_codes
|
||||
]
|
||||
|
||||
return office_actions
|
||||
|
||||
def get_status_summary(self, application_number: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get a summary of application status.
|
||||
|
||||
Args:
|
||||
application_number: Application number
|
||||
|
||||
Returns:
|
||||
Dictionary with status summary:
|
||||
- current_status: Current application status
|
||||
- filing_date: Filing date
|
||||
- status_date: Status date
|
||||
- is_patented: Boolean indicating if patented
|
||||
- patent_number: Patent number if granted
|
||||
- pendency_days: Days since filing
|
||||
"""
|
||||
app_data = self.get_application(application_number)
|
||||
if not app_data:
|
||||
return {}
|
||||
|
||||
filing_date = app_data.get('filing_date')
|
||||
if filing_date:
|
||||
filing_dt = datetime.strptime(filing_date, '%Y-%m-%d')
|
||||
pendency_days = (datetime.now() - filing_dt).days
|
||||
else:
|
||||
pendency_days = None
|
||||
|
||||
return {
|
||||
'current_status': app_data.get('app_status'),
|
||||
'filing_date': filing_date,
|
||||
'status_date': app_data.get('app_status_date'),
|
||||
'is_patented': app_data.get('patent_number') is not None,
|
||||
'patent_number': app_data.get('patent_number'),
|
||||
'issue_date': app_data.get('patent_issue_date'),
|
||||
'pendency_days': pendency_days,
|
||||
'title': app_data.get('title'),
|
||||
'inventors': app_data.get('inventors', []),
|
||||
'assignees': app_data.get('assignees', [])
|
||||
}
|
||||
|
||||
def analyze_prosecution(self, application_number: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze prosecution history.
|
||||
|
||||
Args:
|
||||
application_number: Application number
|
||||
|
||||
Returns:
|
||||
Dictionary with prosecution analysis:
|
||||
- total_office_actions: Count of office actions
|
||||
- rejections: Count of rejections
|
||||
- allowance: Boolean if allowed
|
||||
- response_count: Count of applicant responses
|
||||
- examination_duration: Days from filing to allowance/abandonment
|
||||
"""
|
||||
transactions = self.get_transaction_history(application_number)
|
||||
app_summary = self.get_status_summary(application_number)
|
||||
|
||||
if not transactions:
|
||||
return {}
|
||||
|
||||
analysis = {
|
||||
'total_office_actions': 0,
|
||||
'non_final_rejections': 0,
|
||||
'final_rejections': 0,
|
||||
'allowance': False,
|
||||
'responses': 0,
|
||||
'abandonment': False
|
||||
}
|
||||
|
||||
for trans in transactions:
|
||||
code = trans.get('code', '')
|
||||
if code == 'CTNF':
|
||||
analysis['non_final_rejections'] += 1
|
||||
analysis['total_office_actions'] += 1
|
||||
elif code == 'CTFR':
|
||||
analysis['final_rejections'] += 1
|
||||
analysis['total_office_actions'] += 1
|
||||
elif code in ['AOPF', 'OA']:
|
||||
analysis['total_office_actions'] += 1
|
||||
elif code == 'NOA':
|
||||
analysis['allowance'] = True
|
||||
elif code == 'WRIT':
|
||||
analysis['responses'] += 1
|
||||
elif code == 'ABND':
|
||||
analysis['abandonment'] = True
|
||||
|
||||
analysis['status'] = app_summary.get('current_status')
|
||||
analysis['pendency_days'] = app_summary.get('pendency_days')
|
||||
|
||||
return analysis
|
||||
|
||||
def _format_application_data(self, raw_data: Dict) -> Dict:
|
||||
"""Format raw PEDS data into cleaner structure."""
|
||||
# This is a placeholder - actual implementation depends on
|
||||
# the structure returned by uspto-opendata-python
|
||||
return raw_data
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface for PEDS data."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage:")
|
||||
print(" python peds_client.py <application_number>")
|
||||
print(" python peds_client.py --patent <patent_number>")
|
||||
print(" python peds_client.py --status <application_number>")
|
||||
print(" python peds_client.py --analyze <application_number>")
|
||||
sys.exit(1)
|
||||
|
||||
if not HAS_USPTO_LIB:
|
||||
print("Error: uspto-opendata-python library not installed")
|
||||
print("Install with: pip install uspto-opendata-python")
|
||||
sys.exit(1)
|
||||
|
||||
helper = PEDSHelper()
|
||||
|
||||
try:
|
||||
if sys.argv[1] == "--patent":
|
||||
result = helper.get_patent(sys.argv[2])
|
||||
elif sys.argv[1] == "--status":
|
||||
result = helper.get_status_summary(sys.argv[2])
|
||||
elif sys.argv[1] == "--analyze":
|
||||
result = helper.analyze_prosecution(sys.argv[2])
|
||||
else:
|
||||
result = helper.get_application(sys.argv[1])
|
||||
|
||||
if result:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print("No data found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
263
scientific-databases/uspto-database/scripts/trademark_client.py
Normal file
263
scientific-databases/uspto-database/scripts/trademark_client.py
Normal file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
USPTO Trademark API Helper
|
||||
|
||||
Provides functions for searching and retrieving trademark data using USPTO
|
||||
Trademark Status & Document Retrieval (TSDR) API.
|
||||
|
||||
Requires:
|
||||
- requests library: pip install requests
|
||||
- USPTO API key from https://account.uspto.gov/api-manager/
|
||||
|
||||
Environment variables:
|
||||
USPTO_API_KEY - Your USPTO API key
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
|
||||
class TrademarkClient:
|
||||
"""Client for USPTO Trademark APIs."""
|
||||
|
||||
TSDR_BASE_URL = "https://tsdrapi.uspto.gov/ts/cd"
|
||||
ASSIGNMENT_BASE_URL = "https://assignment-api.uspto.gov/trademark"
|
||||
|
||||
def __init__(self, api_key: Optional[str] = None):
|
||||
"""
|
||||
Initialize client with API key.
|
||||
|
||||
Args:
|
||||
api_key: USPTO API key (if not provided, uses USPTO_API_KEY env var)
|
||||
"""
|
||||
self.api_key = api_key or os.getenv("USPTO_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError("API key required. Set USPTO_API_KEY environment variable or pass to constructor.")
|
||||
|
||||
self.headers = {"X-Api-Key": self.api_key}
|
||||
|
||||
def get_trademark_by_serial(self, serial_number: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get trademark information by serial number.
|
||||
|
||||
Args:
|
||||
serial_number: Trademark serial number (e.g., "87654321")
|
||||
|
||||
Returns:
|
||||
Trademark data dictionary or None if not found
|
||||
"""
|
||||
url = f"{self.TSDR_BASE_URL}/casedocs/sn{serial_number}/info.json"
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
def get_trademark_by_registration(self, registration_number: str) -> Optional[Dict]:
|
||||
"""
|
||||
Get trademark information by registration number.
|
||||
|
||||
Args:
|
||||
registration_number: Trademark registration number (e.g., "5678901")
|
||||
|
||||
Returns:
|
||||
Trademark data dictionary or None if not found
|
||||
"""
|
||||
url = f"{self.TSDR_BASE_URL}/casedocs/rn{registration_number}/info.json"
|
||||
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
def get_trademark_status(self, serial_or_registration: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get current status summary for a trademark.
|
||||
|
||||
Args:
|
||||
serial_or_registration: Serial or registration number
|
||||
|
||||
Returns:
|
||||
Status summary dictionary with:
|
||||
- mark_text: Text of the mark
|
||||
- status: Current status
|
||||
- filing_date: Application filing date
|
||||
- registration_number: Registration number if registered
|
||||
- registration_date: Registration date if registered
|
||||
"""
|
||||
# Try serial number first
|
||||
data = self.get_trademark_by_serial(serial_or_registration)
|
||||
|
||||
# If not found, try registration number
|
||||
if not data:
|
||||
data = self.get_trademark_by_registration(serial_or_registration)
|
||||
|
||||
if not data:
|
||||
return {}
|
||||
|
||||
tm = data.get('TradeMarkAppln', {})
|
||||
|
||||
return {
|
||||
'mark_text': tm.get('MarkVerbalElementText'),
|
||||
'status': tm.get('MarkCurrentStatusExternalDescriptionText'),
|
||||
'status_date': tm.get('MarkCurrentStatusDate'),
|
||||
'filing_date': tm.get('ApplicationDate'),
|
||||
'application_number': tm.get('ApplicationNumber'),
|
||||
'registration_number': tm.get('RegistrationNumber'),
|
||||
'registration_date': tm.get('RegistrationDate'),
|
||||
'mark_drawing_code': tm.get('MarkDrawingCode'),
|
||||
'is_registered': tm.get('RegistrationNumber') is not None
|
||||
}
|
||||
|
||||
def get_goods_and_services(self, serial_or_registration: str) -> List[Dict]:
|
||||
"""
|
||||
Get goods and services classification for a trademark.
|
||||
|
||||
Args:
|
||||
serial_or_registration: Serial or registration number
|
||||
|
||||
Returns:
|
||||
List of goods/services entries with classes
|
||||
"""
|
||||
data = self.get_trademark_by_serial(serial_or_registration)
|
||||
if not data:
|
||||
data = self.get_trademark_by_registration(serial_or_registration)
|
||||
|
||||
if not data:
|
||||
return []
|
||||
|
||||
tm = data.get('TradeMarkAppln', {})
|
||||
return tm.get('GoodsAndServices', [])
|
||||
|
||||
def get_owner_info(self, serial_or_registration: str) -> List[Dict]:
|
||||
"""
|
||||
Get owner/applicant information for a trademark.
|
||||
|
||||
Args:
|
||||
serial_or_registration: Serial or registration number
|
||||
|
||||
Returns:
|
||||
List of owner entries
|
||||
"""
|
||||
data = self.get_trademark_by_serial(serial_or_registration)
|
||||
if not data:
|
||||
data = self.get_trademark_by_registration(serial_or_registration)
|
||||
|
||||
if not data:
|
||||
return []
|
||||
|
||||
tm = data.get('TradeMarkAppln', {})
|
||||
return tm.get('Owners', [])
|
||||
|
||||
def get_prosecution_history(self, serial_or_registration: str) -> List[Dict]:
|
||||
"""
|
||||
Get prosecution history for a trademark.
|
||||
|
||||
Args:
|
||||
serial_or_registration: Serial or registration number
|
||||
|
||||
Returns:
|
||||
List of prosecution events
|
||||
"""
|
||||
data = self.get_trademark_by_serial(serial_or_registration)
|
||||
if not data:
|
||||
data = self.get_trademark_by_registration(serial_or_registration)
|
||||
|
||||
if not data:
|
||||
return []
|
||||
|
||||
tm = data.get('TradeMarkAppln', {})
|
||||
return tm.get('ProsecutionHistoryEntry', [])
|
||||
|
||||
def check_trademark_health(self, serial_or_registration: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Check trademark health and identify issues.
|
||||
|
||||
Args:
|
||||
serial_or_registration: Serial or registration number
|
||||
|
||||
Returns:
|
||||
Health check dictionary with alerts and status
|
||||
"""
|
||||
status = self.get_trademark_status(serial_or_registration)
|
||||
|
||||
if not status:
|
||||
return {'error': 'Trademark not found'}
|
||||
|
||||
current_status = status.get('status', '').upper()
|
||||
alerts = []
|
||||
|
||||
# Check for problematic statuses
|
||||
if 'ABANDON' in current_status:
|
||||
alerts.append('⚠️ ABANDONED - Mark is no longer active')
|
||||
elif 'CANCELLED' in current_status:
|
||||
alerts.append('⚠️ CANCELLED - Registration cancelled')
|
||||
elif 'EXPIRED' in current_status:
|
||||
alerts.append('⚠️ EXPIRED - Registration has expired')
|
||||
elif 'SUSPENDED' in current_status:
|
||||
alerts.append('⏸️ SUSPENDED - Examination suspended')
|
||||
elif 'PUBLISHED' in current_status:
|
||||
alerts.append('📢 PUBLISHED - In opposition period')
|
||||
elif 'REGISTERED' in current_status:
|
||||
alerts.append('✅ ACTIVE - Mark is registered and active')
|
||||
elif 'PENDING' in current_status:
|
||||
alerts.append('⏳ PENDING - Application under examination')
|
||||
|
||||
return {
|
||||
'mark': status.get('mark_text'),
|
||||
'status': current_status,
|
||||
'status_date': status.get('status_date'),
|
||||
'alerts': alerts,
|
||||
'needs_attention': len([a for a in alerts if '⚠️' in a]) > 0
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface for trademark search."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage:")
|
||||
print(" python trademark_client.py <serial_or_registration_number>")
|
||||
print(" python trademark_client.py --status <number>")
|
||||
print(" python trademark_client.py --health <number>")
|
||||
print(" python trademark_client.py --goods <number>")
|
||||
sys.exit(1)
|
||||
|
||||
client = TrademarkClient()
|
||||
|
||||
try:
|
||||
if sys.argv[1] == "--status":
|
||||
result = client.get_trademark_status(sys.argv[2])
|
||||
elif sys.argv[1] == "--health":
|
||||
result = client.check_trademark_health(sys.argv[2])
|
||||
elif sys.argv[1] == "--goods":
|
||||
result = client.get_goods_and_services(sys.argv[2])
|
||||
else:
|
||||
# Get full trademark data
|
||||
result = client.get_trademark_by_serial(sys.argv[1])
|
||||
if not result:
|
||||
result = client.get_trademark_by_registration(sys.argv[1])
|
||||
|
||||
if result:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print(f"Trademark {sys.argv[1]} not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user