mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-01-26 16:58:56 +08:00
Add BRENDA database skill for enzyme research and analysis
- Add comprehensive BRENDA database skill with API integration
- Include enzyme data retrieval, pathway analysis, and visualization
- Support for enzyme queries, kinetic parameters, and taxonomy data
- Add visualization scripts for enzyme pathways and kinetics
This commit is contained in:
714
scientific-skills/brenda-database/SKILL.md
Normal file
714
scientific-skills/brenda-database/SKILL.md
Normal file
@@ -0,0 +1,714 @@
|
|||||||
|
---
|
||||||
|
name: brenda-database
|
||||||
|
description: "Access BRENDA enzyme database via SOAP API. Retrieve kinetic parameters (Km, kcat), reaction equations, organism data, and substrate-specific enzyme information for biochemical research and metabolic pathway analysis."
|
||||||
|
---
|
||||||
|
|
||||||
|
# BRENDA Database
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
BRENDA (BRaunschweig ENzyme DAtabase) is the world's most comprehensive enzyme information system, containing detailed enzyme data from scientific literature. Query kinetic parameters (Km, kcat), reaction equations, substrate specificities, organism information, and optimal conditions for enzymes using the official SOAP API. Access over 45,000 enzymes with millions of kinetic data points for biochemical research, metabolic engineering, and enzyme discovery.
|
||||||
|
|
||||||
|
## When to Use This Skill
|
||||||
|
|
||||||
|
This skill should be used when:
|
||||||
|
- Searching for enzyme kinetic parameters (Km, kcat, Vmax)
|
||||||
|
- Retrieving reaction equations and stoichiometry
|
||||||
|
- Finding enzymes for specific substrates or reactions
|
||||||
|
- Comparing enzyme properties across different organisms
|
||||||
|
- Investigating optimal pH, temperature, and conditions
|
||||||
|
- Accessing enzyme inhibition and activation data
|
||||||
|
- Supporting metabolic pathway reconstruction and retrosynthesis
|
||||||
|
- Performing enzyme engineering and optimization studies
|
||||||
|
- Analyzing substrate specificity and cofactor requirements
|
||||||
|
|
||||||
|
## Core Capabilities
|
||||||
|
|
||||||
|
### 1. Kinetic Parameter Retrieval
|
||||||
|
|
||||||
|
Access comprehensive kinetic data for enzymes:
|
||||||
|
|
||||||
|
**Get Km Values by EC Number**:
|
||||||
|
```python
|
||||||
|
from brenda_client import get_km_values
|
||||||
|
|
||||||
|
# Get Km values for all organisms
|
||||||
|
km_data = get_km_values("1.1.1.1") # Alcohol dehydrogenase
|
||||||
|
|
||||||
|
# Get Km values for specific organism
|
||||||
|
km_data = get_km_values("1.1.1.1", organism="Saccharomyces cerevisiae")
|
||||||
|
|
||||||
|
# Get Km values for specific substrate
|
||||||
|
km_data = get_km_values("1.1.1.1", substrate="ethanol")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Parse Km Results**:
|
||||||
|
```python
|
||||||
|
for entry in km_data:
|
||||||
|
print(f"Km: {entry}")
|
||||||
|
# Example output: "organism*Homo sapiens#substrate*ethanol#kmValue*1.2#commentary*"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Extract Specific Information**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import parse_km_entry, extract_organism_data
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
organism = extract_organism_data(entry)
|
||||||
|
print(f"Organism: {parsed['organism']}")
|
||||||
|
print(f"Substrate: {parsed['substrate']}")
|
||||||
|
print(f"Km value: {parsed['km_value']}")
|
||||||
|
print(f"pH: {parsed.get('ph', 'N/A')}")
|
||||||
|
print(f"Temperature: {parsed.get('temperature', 'N/A')}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Reaction Information
|
||||||
|
|
||||||
|
Retrieve reaction equations and details:
|
||||||
|
|
||||||
|
**Get Reactions by EC Number**:
|
||||||
|
```python
|
||||||
|
from brenda_client import get_reactions
|
||||||
|
|
||||||
|
# Get all reactions for EC number
|
||||||
|
reactions = get_reactions("1.1.1.1")
|
||||||
|
|
||||||
|
# Filter by organism
|
||||||
|
reactions = get_reactions("1.1.1.1", organism="Escherichia coli")
|
||||||
|
|
||||||
|
# Search specific reaction
|
||||||
|
reactions = get_reactions("1.1.1.1", reaction="ethanol + NAD+")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Process Reaction Data**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import parse_reaction_entry, extract_substrate_products
|
||||||
|
|
||||||
|
for reaction in reactions:
|
||||||
|
parsed = parse_reaction_entry(reaction)
|
||||||
|
substrates, products = extract_substrate_products(reaction)
|
||||||
|
|
||||||
|
print(f"Reaction: {parsed['reaction']}")
|
||||||
|
print(f"Organism: {parsed['organism']}")
|
||||||
|
print(f"Substrates: {substrates}")
|
||||||
|
print(f"Products: {products}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Enzyme Discovery
|
||||||
|
|
||||||
|
Find enzymes for specific biochemical transformations:
|
||||||
|
|
||||||
|
**Find Enzymes by Substrate**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import search_enzymes_by_substrate
|
||||||
|
|
||||||
|
# Find enzymes that act on glucose
|
||||||
|
enzymes = search_enzymes_by_substrate("glucose", limit=20)
|
||||||
|
|
||||||
|
for enzyme in enzymes:
|
||||||
|
print(f"EC: {enzyme['ec_number']}")
|
||||||
|
print(f"Name: {enzyme['enzyme_name']}")
|
||||||
|
print(f"Reaction: {enzyme['reaction']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Find Enzymes by Product**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import search_enzymes_by_product
|
||||||
|
|
||||||
|
# Find enzymes that produce lactate
|
||||||
|
enzymes = search_enzymes_by_product("lactate", limit=10)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Search by Reaction Pattern**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import search_by_pattern
|
||||||
|
|
||||||
|
# Find oxidation reactions
|
||||||
|
enzymes = search_by_pattern("oxidation", limit=15)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Organism-Specific Enzyme Data
|
||||||
|
|
||||||
|
Compare enzyme properties across organisms:
|
||||||
|
|
||||||
|
**Get Enzyme Data for Multiple Organisms**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import compare_across_organisms
|
||||||
|
|
||||||
|
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||||
|
comparison = compare_across_organisms("1.1.1.1", organisms)
|
||||||
|
|
||||||
|
for org_data in comparison:
|
||||||
|
print(f"Organism: {org_data['organism']}")
|
||||||
|
print(f"Avg Km: {org_data['average_km']}")
|
||||||
|
print(f"Optimal pH: {org_data['optimal_ph']}")
|
||||||
|
print(f"Temperature range: {org_data['temperature_range']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Find Organisms with Specific Enzyme**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_organisms_for_enzyme
|
||||||
|
|
||||||
|
organisms = get_organisms_for_enzyme("6.3.5.5") # Glutamine synthetase
|
||||||
|
print(f"Found {len(organisms)} organisms with this enzyme")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Environmental Parameters
|
||||||
|
|
||||||
|
Access optimal conditions and environmental parameters:
|
||||||
|
|
||||||
|
**Get pH and Temperature Data**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_environmental_parameters
|
||||||
|
|
||||||
|
params = get_environmental_parameters("1.1.1.1")
|
||||||
|
|
||||||
|
print(f"Optimal pH range: {params['ph_range']}")
|
||||||
|
print(f"Optimal temperature: {params['optimal_temperature']}")
|
||||||
|
print(f"Stability pH: {params['stability_ph']}")
|
||||||
|
print(f"Temperature stability: {params['temperature_stability']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Cofactor Requirements**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_cofactor_requirements
|
||||||
|
|
||||||
|
cofactors = get_cofactor_requirements("1.1.1.1")
|
||||||
|
for cofactor in cofactors:
|
||||||
|
print(f"Cofactor: {cofactor['name']}")
|
||||||
|
print(f"Type: {cofactor['type']}")
|
||||||
|
print(f"Concentration: {cofactor['concentration']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Substrate Specificity
|
||||||
|
|
||||||
|
Analyze enzyme substrate preferences:
|
||||||
|
|
||||||
|
**Get Substrate Specificity Data**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_substrate_specificity
|
||||||
|
|
||||||
|
specificity = get_substrate_specificity("1.1.1.1")
|
||||||
|
|
||||||
|
for substrate in specificity:
|
||||||
|
print(f"Substrate: {substrate['name']}")
|
||||||
|
print(f"Km: {substrate['km']}")
|
||||||
|
print(f"Vmax: {substrate['vmax']}")
|
||||||
|
print(f"kcat: {substrate['kcat']}")
|
||||||
|
print(f"Specificity constant: {substrate['kcat_km_ratio']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Compare Substrate Preferences**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import compare_substrate_affinity
|
||||||
|
|
||||||
|
comparison = compare_substrate_affinity("1.1.1.1")
|
||||||
|
sorted_by_km = sorted(comparison, key=lambda x: x['km'])
|
||||||
|
|
||||||
|
for substrate in sorted_by_km[:5]: # Top 5 lowest Km
|
||||||
|
print(f"{substrate['name']}: Km = {substrate['km']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Inhibition and Activation
|
||||||
|
|
||||||
|
Access enzyme regulation data:
|
||||||
|
|
||||||
|
**Get Inhibitor Information**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_inhibitors
|
||||||
|
|
||||||
|
inhibitors = get_inhibitors("1.1.1.1")
|
||||||
|
|
||||||
|
for inhibitor in inhibitors:
|
||||||
|
print(f"Inhibitor: {inhibitor['name']}")
|
||||||
|
print(f"Type: {inhibitor['type']}")
|
||||||
|
print(f"Ki: {inhibitor['ki']}")
|
||||||
|
print(f"IC50: {inhibitor['ic50']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get Activator Information**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_activators
|
||||||
|
|
||||||
|
activators = get_activators("1.1.1.1")
|
||||||
|
|
||||||
|
for activator in activators:
|
||||||
|
print(f"Activator: {activator['name']}")
|
||||||
|
print(f"Effect: {activator['effect']}")
|
||||||
|
print(f"Mechanism: {activator['mechanism']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8. Enzyme Engineering Support
|
||||||
|
|
||||||
|
Find engineering targets and alternatives:
|
||||||
|
|
||||||
|
**Find Thermophilic Homologs**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import find_thermophilic_homologs
|
||||||
|
|
||||||
|
thermophilic = find_thermophilic_homologs("1.1.1.1", min_temp=50)
|
||||||
|
|
||||||
|
for enzyme in thermophilic:
|
||||||
|
print(f"Organism: {enzyme['organism']}")
|
||||||
|
print(f"Optimal temp: {enzyme['optimal_temperature']}")
|
||||||
|
print(f"Km: {enzyme['km']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Find Alkaline/ Acid Stable Variants**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import find_ph_stable_variants
|
||||||
|
|
||||||
|
alkaline = find_ph_stable_variants("1.1.1.1", min_ph=8.0)
|
||||||
|
acidic = find_ph_stable_variants("1.1.1.1", max_ph=6.0)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9. Kinetic Modeling
|
||||||
|
|
||||||
|
Prepare data for kinetic modeling:
|
||||||
|
|
||||||
|
**Get Kinetic Parameters for Modeling**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_modeling_parameters
|
||||||
|
|
||||||
|
model_data = get_modeling_parameters("1.1.1.1", substrate="ethanol")
|
||||||
|
|
||||||
|
print(f"Km: {model_data['km']}")
|
||||||
|
print(f"Vmax: {model_data['vmax']}")
|
||||||
|
print(f"kcat: {model_data['kcat']}")
|
||||||
|
print(f"Enzyme concentration: {model_data['enzyme_conc']}")
|
||||||
|
print(f"Temperature: {model_data['temperature']}")
|
||||||
|
print(f"pH: {model_data['ph']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Generate Michaelis-Menten Plots**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_visualization import plot_michaelis_menten
|
||||||
|
|
||||||
|
# Generate kinetic plots
|
||||||
|
plot_michaelis_menten("1.1.1.1", substrate="ethanol")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Installation Requirements
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv pip install zeep requests pandas matplotlib seaborn
|
||||||
|
```
|
||||||
|
|
||||||
|
## Authentication Setup
|
||||||
|
|
||||||
|
BRENDA requires authentication credentials:
|
||||||
|
|
||||||
|
1. **Create .env file**:
|
||||||
|
```
|
||||||
|
BRENDA_EMAIL=your.email@example.com
|
||||||
|
BRENDA_PASSWORD=your_brenda_password
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Or set environment variables**:
|
||||||
|
```bash
|
||||||
|
export BRENDA_EMAIL="your.email@example.com"
|
||||||
|
export BRENDA_PASSWORD="your_brenda_password"
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Register for BRENDA access**:
|
||||||
|
- Visit https://www.brenda-enzymes.org/
|
||||||
|
- Create an account
|
||||||
|
- Check your email for credentials
|
||||||
|
- Note: There's also `BRENDA_EMIAL` (note the typo) for legacy support
|
||||||
|
|
||||||
|
## Helper Scripts
|
||||||
|
|
||||||
|
This skill includes comprehensive Python scripts for BRENDA database queries:
|
||||||
|
|
||||||
|
### scripts/brenda_queries.py
|
||||||
|
|
||||||
|
Provides high-level functions for enzyme data analysis:
|
||||||
|
|
||||||
|
**Key Functions**:
|
||||||
|
- `parse_km_entry(entry)`: Parse BRENDA Km data entries
|
||||||
|
- `parse_reaction_entry(entry)`: Parse reaction data entries
|
||||||
|
- `extract_organism_data(entry)`: Extract organism-specific information
|
||||||
|
- `search_enzymes_by_substrate(substrate, limit)`: Find enzymes for substrates
|
||||||
|
- `search_enzymes_by_product(product, limit)`: Find enzymes producing products
|
||||||
|
- `compare_across_organisms(ec_number, organisms)`: Compare enzyme properties
|
||||||
|
- `get_environmental_parameters(ec_number)`: Get pH and temperature data
|
||||||
|
- `get_cofactor_requirements(ec_number)`: Get cofactor information
|
||||||
|
- `get_substrate_specificity(ec_number)`: Analyze substrate preferences
|
||||||
|
- `get_inhibitors(ec_number)`: Get enzyme inhibition data
|
||||||
|
- `get_activators(ec_number)`: Get enzyme activation data
|
||||||
|
- `find_thermophilic_homologs(ec_number, min_temp)`: Find heat-stable variants
|
||||||
|
- `get_modeling_parameters(ec_number, substrate)`: Get parameters for kinetic modeling
|
||||||
|
- `export_kinetic_data(ec_number, format, filename)`: Export data to file
|
||||||
|
|
||||||
|
**Usage**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import search_enzymes_by_substrate, compare_across_organisms
|
||||||
|
|
||||||
|
# Search for enzymes
|
||||||
|
enzymes = search_enzymes_by_substrate("glucose", limit=20)
|
||||||
|
|
||||||
|
# Compare across organisms
|
||||||
|
comparison = compare_across_organisms("1.1.1.1", ["E. coli", "S. cerevisiae"])
|
||||||
|
```
|
||||||
|
|
||||||
|
### scripts/brenda_visualization.py
|
||||||
|
|
||||||
|
Provides visualization functions for enzyme data:
|
||||||
|
|
||||||
|
**Key Functions**:
|
||||||
|
- `plot_kinetic_parameters(ec_number)`: Plot Km and kcat distributions
|
||||||
|
- `plot_organism_comparison(ec_number, organisms)`: Compare organisms
|
||||||
|
- `plot_pH_profiles(ec_number)`: Plot pH activity profiles
|
||||||
|
- `plot_temperature_profiles(ec_number)`: Plot temperature activity profiles
|
||||||
|
- `plot_substrate_specificity(ec_number)`: Visualize substrate preferences
|
||||||
|
- `plot_michaelis_menten(ec_number, substrate)`: Generate kinetic curves
|
||||||
|
- `create_heatmap_data(enzymes, parameters)`: Create data for heatmaps
|
||||||
|
- `generate_summary_plots(ec_number)`: Create comprehensive enzyme overview
|
||||||
|
|
||||||
|
**Usage**:
|
||||||
|
```python
|
||||||
|
from scripts.brenda_visualization import plot_kinetic_parameters, plot_michaelis_menten
|
||||||
|
|
||||||
|
# Plot kinetic parameters
|
||||||
|
plot_kinetic_parameters("1.1.1.1")
|
||||||
|
|
||||||
|
# Generate Michaelis-Menten curve
|
||||||
|
plot_michaelis_menten("1.1.1.1", substrate="ethanol")
|
||||||
|
```
|
||||||
|
|
||||||
|
### scripts/enzyme_pathway_builder.py
|
||||||
|
|
||||||
|
Build enzymatic pathways and retrosynthetic routes:
|
||||||
|
|
||||||
|
**Key Functions**:
|
||||||
|
- `find_pathway_for_product(product, max_steps)`: Find enzymatic pathways
|
||||||
|
- `build_retrosynthetic_tree(target, depth)`: Build retrosynthetic tree
|
||||||
|
- `suggest_enzyme_substitutions(ec_number, criteria)`: Suggest enzyme alternatives
|
||||||
|
- `calculate_pathway_feasibility(pathway)`: Evaluate pathway viability
|
||||||
|
- `optimize_pathway_conditions(pathway)`: Suggest optimal conditions
|
||||||
|
- `generate_pathway_report(pathway, filename)`: Create detailed pathway report
|
||||||
|
|
||||||
|
**Usage**:
|
||||||
|
```python
|
||||||
|
from scripts.enzyme_pathway_builder import find_pathway_for_product, build_retrosynthetic_tree
|
||||||
|
|
||||||
|
# Find pathway to product
|
||||||
|
pathway = find_pathway_for_product("lactate", max_steps=3)
|
||||||
|
|
||||||
|
# Build retrosynthetic tree
|
||||||
|
tree = build_retrosynthetic_tree("lactate", depth=2)
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Rate Limits and Best Practices
|
||||||
|
|
||||||
|
**Rate Limits**:
|
||||||
|
- BRENDA API has moderate rate limiting
|
||||||
|
- Recommended: 1 request per second for sustained usage
|
||||||
|
- Maximum: 5 requests per 10 seconds
|
||||||
|
|
||||||
|
**Best Practices**:
|
||||||
|
1. **Cache results**: Store frequently accessed enzyme data locally
|
||||||
|
2. **Batch queries**: Combine related requests when possible
|
||||||
|
3. **Use specific searches**: Narrow down by organism, substrate when possible
|
||||||
|
4. **Handle missing data**: Not all enzymes have complete data
|
||||||
|
5. **Validate EC numbers**: Ensure EC numbers are in correct format
|
||||||
|
6. **Implement delays**: Add delays between consecutive requests
|
||||||
|
7. **Use wildcards wisely**: Use '*' for broader searches when appropriate
|
||||||
|
8. **Monitor quota**: Track your API usage
|
||||||
|
|
||||||
|
**Error Handling**:
|
||||||
|
```python
|
||||||
|
from brenda_client import get_km_values, get_reactions
|
||||||
|
from zeep.exceptions import Fault, TransportError
|
||||||
|
|
||||||
|
try:
|
||||||
|
km_data = get_km_values("1.1.1.1")
|
||||||
|
except RuntimeError as e:
|
||||||
|
print(f"Authentication error: {e}")
|
||||||
|
except Fault as e:
|
||||||
|
print(f"BRENDA API error: {e}")
|
||||||
|
except TransportError as e:
|
||||||
|
print(f"Network error: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Unexpected error: {e}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Workflows
|
||||||
|
|
||||||
|
### Workflow 1: Enzyme Discovery for New Substrate
|
||||||
|
|
||||||
|
Find suitable enzymes for a specific substrate:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from brenda_client import get_km_values
|
||||||
|
from scripts.brenda_queries import search_enzymes_by_substrate, compare_substrate_affinity
|
||||||
|
|
||||||
|
# Search for enzymes that act on substrate
|
||||||
|
substrate = "2-phenylethanol"
|
||||||
|
enzymes = search_enzymes_by_substrate(substrate, limit=15)
|
||||||
|
|
||||||
|
print(f"Found {len(enzymes)} enzymes for {substrate}")
|
||||||
|
for enzyme in enzymes:
|
||||||
|
print(f"EC {enzyme['ec_number']}: {enzyme['enzyme_name']}")
|
||||||
|
|
||||||
|
# Get kinetic data for best candidates
|
||||||
|
if enzymes:
|
||||||
|
best_ec = enzymes[0]['ec_number']
|
||||||
|
km_data = get_km_values(best_ec, substrate=substrate)
|
||||||
|
|
||||||
|
if km_data:
|
||||||
|
print(f"Kinetic data for {best_ec}:")
|
||||||
|
for entry in km_data[:3]: # First 3 entries
|
||||||
|
print(f" {entry}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Workflow 2: Cross-Organism Enzyme Comparison
|
||||||
|
|
||||||
|
Compare enzyme properties across different organisms:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import compare_across_organisms, get_environmental_parameters
|
||||||
|
|
||||||
|
# Define organisms for comparison
|
||||||
|
organisms = [
|
||||||
|
"Escherichia coli",
|
||||||
|
"Saccharomyces cerevisiae",
|
||||||
|
"Bacillus subtilis",
|
||||||
|
"Thermus thermophilus"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Compare alcohol dehydrogenase
|
||||||
|
comparison = compare_across_organisms("1.1.1.1", organisms)
|
||||||
|
|
||||||
|
print("Cross-organism comparison:")
|
||||||
|
for org_data in comparison:
|
||||||
|
print(f"\n{org_data['organism']}:")
|
||||||
|
print(f" Average Km: {org_data['average_km']}")
|
||||||
|
print(f" Optimal pH: {org_data['optimal_ph']}")
|
||||||
|
print(f" Temperature: {org_data['optimal_temperature']}°C")
|
||||||
|
|
||||||
|
# Get detailed environmental parameters
|
||||||
|
env_params = get_environmental_parameters("1.1.1.1")
|
||||||
|
print(f"\nOverall optimal pH range: {env_params['ph_range']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Workflow 3: Enzyme Engineering Target Identification
|
||||||
|
|
||||||
|
Find engineering opportunities for enzyme improvement:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import (
|
||||||
|
find_thermophilic_homologs,
|
||||||
|
find_ph_stable_variants,
|
||||||
|
compare_substrate_affinity
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find thermophilic variants for heat stability
|
||||||
|
thermophilic = find_thermophilic_homologs("1.1.1.1", min_temp=50)
|
||||||
|
print(f"Found {len(thermophilic)} thermophilic variants")
|
||||||
|
|
||||||
|
# Find alkaline-stable variants
|
||||||
|
alkaline = find_ph_stable_variants("1.1.1.1", min_ph=8.0)
|
||||||
|
print(f"Found {len(alkaline)} alkaline-stable variants")
|
||||||
|
|
||||||
|
# Compare substrate specificities for engineering targets
|
||||||
|
specificity = compare_substrate_affinity("1.1.1.1")
|
||||||
|
print("Substrate affinity ranking:")
|
||||||
|
for i, sub in enumerate(specificity[:5]):
|
||||||
|
print(f" {i+1}. {sub['name']}: Km = {sub['km']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Workflow 4: Enzymatic Pathway Construction
|
||||||
|
|
||||||
|
Build enzymatic synthesis pathways:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from scripts.enzyme_pathway_builder import (
|
||||||
|
find_pathway_for_product,
|
||||||
|
build_retrosynthetic_tree,
|
||||||
|
calculate_pathway_feasibility
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find pathway to target product
|
||||||
|
target = "lactate"
|
||||||
|
pathway = find_pathway_for_product(target, max_steps=3)
|
||||||
|
|
||||||
|
if pathway:
|
||||||
|
print(f"Found pathway to {target}:")
|
||||||
|
for i, step in enumerate(pathway['steps']):
|
||||||
|
print(f" Step {i+1}: {step['reaction']}")
|
||||||
|
print(f" Enzyme: EC {step['ec_number']}")
|
||||||
|
print(f" Organism: {step['organism']}")
|
||||||
|
|
||||||
|
# Evaluate pathway feasibility
|
||||||
|
feasibility = calculate_pathway_feasibility(pathway)
|
||||||
|
print(f"\nPathway feasibility score: {feasibility['score']}/10")
|
||||||
|
print(f"Potential issues: {feasibility['warnings']}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Workflow 5: Kinetic Parameter Analysis
|
||||||
|
|
||||||
|
Comprehensive kinetic analysis for enzyme selection:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from brenda_client import get_km_values
|
||||||
|
from scripts.brenda_queries import parse_km_entry, get_modeling_parameters
|
||||||
|
from scripts.brenda_visualization import plot_kinetic_parameters
|
||||||
|
|
||||||
|
# Get comprehensive kinetic data
|
||||||
|
ec_number = "1.1.1.1"
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
|
||||||
|
# Analyze kinetic parameters
|
||||||
|
all_entries = []
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if parsed['km_value']:
|
||||||
|
all_entries.append(parsed)
|
||||||
|
|
||||||
|
print(f"Analyzed {len(all_entries)} kinetic entries")
|
||||||
|
|
||||||
|
# Find best kinetic performer
|
||||||
|
best_km = min(all_entries, key=lambda x: x['km_value'])
|
||||||
|
print(f"\nBest kinetic performer:")
|
||||||
|
print(f" Organism: {best_km['organism']}")
|
||||||
|
print(f" Substrate: {best_km['substrate']}")
|
||||||
|
print(f" Km: {best_km['km_value']}")
|
||||||
|
|
||||||
|
# Get modeling parameters
|
||||||
|
model_data = get_modeling_parameters(ec_number, substrate=best_km['substrate'])
|
||||||
|
print(f"\nModeling parameters:")
|
||||||
|
print(f" Km: {model_data['km']}")
|
||||||
|
print(f" kcat: {model_data['kcat']}")
|
||||||
|
print(f" Vmax: {model_data['vmax']}")
|
||||||
|
|
||||||
|
# Generate visualization
|
||||||
|
plot_kinetic_parameters(ec_number)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Workflow 6: Industrial Enzyme Selection
|
||||||
|
|
||||||
|
Select enzymes for industrial applications:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import (
|
||||||
|
find_thermophilic_homologs,
|
||||||
|
get_environmental_parameters,
|
||||||
|
get_inhibitors
|
||||||
|
)
|
||||||
|
|
||||||
|
# Industrial criteria: high temperature tolerance, organic solvent resistance
|
||||||
|
target_enzyme = "1.1.1.1"
|
||||||
|
|
||||||
|
# Find thermophilic variants
|
||||||
|
thermophilic = find_thermophilic_homologs(target_enzyme, min_temp=60)
|
||||||
|
print(f"Thermophilic candidates: {len(thermophilic)}")
|
||||||
|
|
||||||
|
# Check solvent tolerance (inhibitor data)
|
||||||
|
inhibitors = get_inhibitors(target_enzyme)
|
||||||
|
solvent_tolerant = [
|
||||||
|
inv for inv in inhibitors
|
||||||
|
if 'ethanol' not in inv['name'].lower() and
|
||||||
|
'methanol' not in inv['name'].lower()
|
||||||
|
]
|
||||||
|
|
||||||
|
print(f"Solvent tolerant candidates: {len(solvent_tolerant)}")
|
||||||
|
|
||||||
|
# Evaluate top candidates
|
||||||
|
for candidate in thermophilic[:3]:
|
||||||
|
print(f"\nCandidate: {candidate['organism']}")
|
||||||
|
print(f" Optimal temp: {candidate['optimal_temperature']}°C")
|
||||||
|
print(f" Km: {candidate['km']}")
|
||||||
|
print(f" pH range: {candidate.get('ph_range', 'N/A')}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Formats and Parsing
|
||||||
|
|
||||||
|
### BRENDA Response Format
|
||||||
|
|
||||||
|
BRENDA returns data in specific formats that need parsing:
|
||||||
|
|
||||||
|
**Km Value Format**:
|
||||||
|
```
|
||||||
|
organism*Escherichia coli#substrate*ethanol#kmValue*1.2#kmValueMaximum*#commentary*pH 7.4, 25°C#ligandStructureId*#literature*
|
||||||
|
```
|
||||||
|
|
||||||
|
**Reaction Format**:
|
||||||
|
```
|
||||||
|
ecNumber*1.1.1.1#organism*Saccharomyces cerevisiae#reaction*ethanol + NAD+ <=> acetaldehyde + NADH + H+#commentary*#literature*
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Extraction Patterns
|
||||||
|
|
||||||
|
```python
|
||||||
|
import re
|
||||||
|
|
||||||
|
def parse_brenda_field(data, field_name):
|
||||||
|
"""Extract specific field from BRENDA data entry"""
|
||||||
|
pattern = f"{field_name}\\*([^#]*)"
|
||||||
|
match = re.search(pattern, data)
|
||||||
|
return match.group(1) if match else None
|
||||||
|
|
||||||
|
def extract_multiple_values(data, field_name):
|
||||||
|
"""Extract multiple values for a field"""
|
||||||
|
pattern = f"{field_name}\\*([^#]*)"
|
||||||
|
matches = re.findall(pattern, data)
|
||||||
|
return [match for match in matches if match.strip()]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Reference Documentation
|
||||||
|
|
||||||
|
For detailed BRENDA documentation, see `references/api_reference.md`. This includes:
|
||||||
|
- Complete SOAP API method documentation
|
||||||
|
- Full parameter lists and formats
|
||||||
|
- EC number structure and validation
|
||||||
|
- Response format specifications
|
||||||
|
- Error codes and handling
|
||||||
|
- Data field definitions
|
||||||
|
- Literature citation formats
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**Authentication Errors**:
|
||||||
|
- Verify BRENDA_EMAIL and BRENDA_PASSWORD in .env file
|
||||||
|
- Check for correct spelling (note BRENDA_EMIAL legacy support)
|
||||||
|
- Ensure BRENDA account is active and has API access
|
||||||
|
|
||||||
|
**No Results Returned**:
|
||||||
|
- Try broader searches with wildcards (*)
|
||||||
|
- Check EC number format (e.g., "1.1.1.1" not "1.1.1")
|
||||||
|
- Verify substrate spelling and naming
|
||||||
|
- Some enzymes may have limited data in BRENDA
|
||||||
|
|
||||||
|
**Rate Limiting**:
|
||||||
|
- Add delays between requests (0.5-1 second)
|
||||||
|
- Cache results locally
|
||||||
|
- Use more specific queries to reduce data volume
|
||||||
|
- Consider batch operations for multiple queries
|
||||||
|
|
||||||
|
**Network Errors**:
|
||||||
|
- Check internet connection
|
||||||
|
- BRENDA server may be temporarily unavailable
|
||||||
|
- Try again after a few minutes
|
||||||
|
- Consider using VPN if geo-restricted
|
||||||
|
|
||||||
|
**Data Format Issues**:
|
||||||
|
- Use the provided parsing functions in scripts
|
||||||
|
- BRENDA data can be inconsistent in formatting
|
||||||
|
- Handle missing fields gracefully
|
||||||
|
- Validate parsed data before use
|
||||||
|
|
||||||
|
**Performance Issues**:
|
||||||
|
- Large queries can be slow; limit search scope
|
||||||
|
- Use specific organism or substrate filters
|
||||||
|
- Consider asynchronous processing for batch operations
|
||||||
|
- Monitor memory usage with large datasets
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
|
||||||
|
- BRENDA Home: https://www.brenda-enzymes.org/
|
||||||
|
- BRENDA SOAP API Documentation: https://www.brenda-enzymes.org/soap.php
|
||||||
|
- Enzyme Commission (EC) Numbers: https://www.qmul.ac.uk/sbcs/iubmb/enzyme/
|
||||||
|
- Zeep SOAP Client: https://python-zeep.readthedocs.io/
|
||||||
|
- Enzyme Nomenclature: https://www.iubmb.org/enzyme/
|
||||||
537
scientific-skills/brenda-database/references/api_reference.md
Normal file
537
scientific-skills/brenda-database/references/api_reference.md
Normal file
@@ -0,0 +1,537 @@
|
|||||||
|
# BRENDA Database API Reference
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document provides detailed reference information for the BRENDA (BRaunschweig ENzyme DAtabase) SOAP API and the Python client implementation. BRENDA is the world's most comprehensive enzyme information system, containing over 45,000 enzymes with millions of kinetic data points.
|
||||||
|
|
||||||
|
## SOAP API Endpoints
|
||||||
|
|
||||||
|
### Base WSDL URL
|
||||||
|
```
|
||||||
|
https://www.brenda-enzymes.org/soap/brenda_zeep.wsdl
|
||||||
|
```
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
|
||||||
|
All BRENDA API calls require authentication using email and password:
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
- `email`: Your registered BRENDA email address
|
||||||
|
- `password`: Your BRENDA account password
|
||||||
|
|
||||||
|
**Authentication Process:**
|
||||||
|
1. Password is hashed using SHA-256 before transmission
|
||||||
|
2. Email and hashed password are included as the first two parameters in every API call
|
||||||
|
3. Legacy support for `BRENDA_EMIAL` environment variable (note the typo)
|
||||||
|
|
||||||
|
## Available SOAP Actions
|
||||||
|
|
||||||
|
### getKmValue
|
||||||
|
|
||||||
|
Retrieves Michaelis constant (Km) values for enzymes.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
1. `email`: BRENDA account email
|
||||||
|
2. `passwordHash`: SHA-256 hashed password
|
||||||
|
3. `ecNumber*: EC number of the enzyme (wildcards allowed)
|
||||||
|
4. `organism*: Organism name (wildcards allowed, default: "*")
|
||||||
|
5. `kmValue*: Km value field (default: "*")
|
||||||
|
6. `kmValueMaximum*: Maximum Km value field (default: "*")
|
||||||
|
7. `substrate*: Substrate name (wildcards allowed, default: "*")
|
||||||
|
8. `commentary*: Commentary field (default: "*")
|
||||||
|
9. `ligandStructureId*: Ligand structure ID field (default: "*")
|
||||||
|
10. `literature*: Literature reference field (default: "*")
|
||||||
|
|
||||||
|
**Wildcards:**
|
||||||
|
- `*`: Matches any sequence
|
||||||
|
- Can be used with partial EC numbers (e.g., "1.1.*")
|
||||||
|
|
||||||
|
**Response Format:**
|
||||||
|
```
|
||||||
|
organism*Escherichia coli#substrate*glucose#kmValue*0.12#kmValueMaximum*#commentary*pH 7.4, 25°C#ligandStructureId*#literature*
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example Response Fields:**
|
||||||
|
- `organism`: Source organism
|
||||||
|
- `substrate`: Substrate name
|
||||||
|
- `kmValue`: Michaelis constant value (typically in mM)
|
||||||
|
- `kmValueMaximum`: Maximum Km value (if available)
|
||||||
|
- `commentary`: Experimental conditions (pH, temperature, etc.)
|
||||||
|
- `ligandStructureId`: BRENDA ligand structure identifier
|
||||||
|
- `literature`: Reference to primary literature
|
||||||
|
|
||||||
|
### getReaction
|
||||||
|
|
||||||
|
Retrieves reaction equations and stoichiometry for enzymes.
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
1. `email`: BRENDA account email
|
||||||
|
2. `passwordHash`: SHA-256 hashed password
|
||||||
|
3. `ecNumber*: EC number of the enzyme (wildcards allowed)
|
||||||
|
4. `organism*: Organism name (wildcards allowed, default: "*")
|
||||||
|
5. `reaction*: Reaction equation (wildcards allowed, default: "*")
|
||||||
|
6. `commentary*: Commentary field (default: "*")
|
||||||
|
7. `literature*: Literature reference field (default: "*")
|
||||||
|
|
||||||
|
**Response Format:**
|
||||||
|
```
|
||||||
|
ecNumber*1.1.1.1#organism*Saccharomyces cerevisiae#reaction*ethanol + NAD+ <=> acetaldehyde + NADH + H+#commentary*#literature*
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example Response Fields:**
|
||||||
|
- `ecNumber`: Enzyme Commission number
|
||||||
|
- `organism`: Source organism
|
||||||
|
- `reaction`: Balanced chemical equation (using <=> for equilibrium, -> for direction)
|
||||||
|
- `commentary`: Additional information
|
||||||
|
- `literature`: Reference citation
|
||||||
|
|
||||||
|
## Data Field Specifications
|
||||||
|
|
||||||
|
### EC Number Format
|
||||||
|
|
||||||
|
EC numbers follow the standard hierarchical format: `A.B.C.D`
|
||||||
|
|
||||||
|
- **A**: Main class (1-6)
|
||||||
|
- 1: Oxidoreductases
|
||||||
|
- 2: Transferases
|
||||||
|
- 3: Hydrolases
|
||||||
|
- 4: Lyases
|
||||||
|
- 5: Isomerases
|
||||||
|
- 6: Ligases
|
||||||
|
- **B**: Subclass
|
||||||
|
- **C**: Sub-subclass
|
||||||
|
- **D**: Serial number
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
- `1.1.1.1`: Alcohol dehydrogenase
|
||||||
|
- `1.1.1.2`: Alcohol dehydrogenase (NADP+)
|
||||||
|
- `3.2.1.23`: Beta-galactosidase
|
||||||
|
- `2.7.1.1`: Hexokinase
|
||||||
|
|
||||||
|
### Organism Names
|
||||||
|
|
||||||
|
Organism names should use proper binomial nomenclature:
|
||||||
|
|
||||||
|
**Correct Format:**
|
||||||
|
- `Escherichia coli`
|
||||||
|
- `Saccharomyces cerevisiae`
|
||||||
|
- `Homo sapiens`
|
||||||
|
|
||||||
|
**Wildcards:**
|
||||||
|
- `Escherichia*`: Matches all E. coli strains
|
||||||
|
- `*coli`: Matches all coli species
|
||||||
|
- `*`: Matches all organisms
|
||||||
|
|
||||||
|
### Substrate Names
|
||||||
|
|
||||||
|
Substrate names follow IUPAC or common biochemical conventions:
|
||||||
|
|
||||||
|
**Common Formats:**
|
||||||
|
- Chemical names: `glucose`, `ethanol`, `pyruvate`
|
||||||
|
- IUPAC names: `β-D-glucose`, `ethanol`, `2-oxopropanoic acid`
|
||||||
|
- Abbreviations: `ATP`, `NAD+`, `CoA`
|
||||||
|
|
||||||
|
**Special Cases:**
|
||||||
|
- Cofactors: `NAD+`, `NADH`, `NADP+`, `NADPH`
|
||||||
|
- Metal ions: `Mg2+`, `Zn2+`, `Fe2+`
|
||||||
|
- Inorganic compounds: `H2O`, `CO2`, `O2`
|
||||||
|
|
||||||
|
### Commentary Field Format
|
||||||
|
|
||||||
|
Commentary fields contain experimental conditions and other metadata:
|
||||||
|
|
||||||
|
**Common Information:**
|
||||||
|
- **pH**: `pH 7.4`, `pH 6.5-8.0`
|
||||||
|
- **Temperature**: `25°C`, `37°C`, `50-60°C`
|
||||||
|
- **Buffer systems**: `phosphate buffer`, `Tris-HCl`
|
||||||
|
- **Purity**: `purified enzyme`, `crude extract`
|
||||||
|
- **Assay conditions**: `spectrophotometric`, `radioactive`
|
||||||
|
- **Inhibition**: `inhibited by heavy metals`, `activated by Mg2+`
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
- `pH 7.4, 25°C, phosphate buffer`
|
||||||
|
- `pH 6.5-8.0 optimum, thermostable enzyme`
|
||||||
|
- `purified enzyme, specific activity 125 U/mg`
|
||||||
|
- `inhibited by iodoacetate, activated by Mn2+`
|
||||||
|
|
||||||
|
### Reaction Equation Format
|
||||||
|
|
||||||
|
Reactions use standard biochemical notation:
|
||||||
|
|
||||||
|
**Symbols:**
|
||||||
|
- `+`: Separate reactants/products
|
||||||
|
- `<=>`: Reversible reactions
|
||||||
|
- `->`: Irreversible (directional) reactions
|
||||||
|
- `=`: Alternative notation for reactions
|
||||||
|
|
||||||
|
**Common Patterns:**
|
||||||
|
- **Oxidation/reduction**: `alcohol + NAD+ <=> aldehyde + NADH + H+`
|
||||||
|
- **Phosphorylation**: `glucose + ATP <=> glucose-6-phosphate + ADP`
|
||||||
|
- **Hydrolysis**: `ester + H2O <=> acid + alcohol`
|
||||||
|
- **Carboxylation**: `acetyl-CoA + CO2 + H2O <=> malonyl-CoA`
|
||||||
|
|
||||||
|
**Cofactor Requirements:**
|
||||||
|
- **Oxidoreductases**: NAD+, NADH, NADP+, NADPH, FAD, FADH2
|
||||||
|
- **Transferases**: ATP, ADP, GTP, GDP
|
||||||
|
- **Ligases**: ATP, CoA
|
||||||
|
|
||||||
|
## Rate Limiting and Usage
|
||||||
|
|
||||||
|
### API Rate Limits
|
||||||
|
|
||||||
|
- **Maximum**: 5 requests per second
|
||||||
|
- **Sustained**: 1 request per second recommended
|
||||||
|
- **Daily quota**: Varies by account type
|
||||||
|
|
||||||
|
### Best Practices
|
||||||
|
|
||||||
|
1. **Implement delays**: Add 0.5-1 second between requests
|
||||||
|
2. **Cache results**: Store frequently accessed data locally
|
||||||
|
3. **Use specific searches**: Narrow by organism and substrate when possible
|
||||||
|
4. **Batch operations**: Group related queries
|
||||||
|
5. **Handle errors gracefully**: Check for HTTP and SOAP errors
|
||||||
|
6. **Use wildcards judiciously**: Broad searches return large datasets
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
**Common SOAP Errors:**
|
||||||
|
- `Authentication failed`: Check email/password
|
||||||
|
- `No data found`: Verify EC number, organism, substrate spelling
|
||||||
|
- `Rate limit exceeded`: Reduce request frequency
|
||||||
|
- `Invalid parameters`: Check parameter format and order
|
||||||
|
|
||||||
|
**Network Errors:**
|
||||||
|
- Connection timeouts
|
||||||
|
- SSL/TLS errors
|
||||||
|
- Service unavailable
|
||||||
|
|
||||||
|
## Python Client Reference
|
||||||
|
|
||||||
|
### brenda_client Module
|
||||||
|
|
||||||
|
#### Core Functions
|
||||||
|
|
||||||
|
**`load_env_from_file(path=".env")`**
|
||||||
|
- **Purpose**: Load environment variables from .env file
|
||||||
|
- **Parameters**: `path` - Path to .env file (default: ".env")
|
||||||
|
- **Returns**: None (populates os.environ)
|
||||||
|
|
||||||
|
**`_get_credentials() -> tuple[str, str]`**
|
||||||
|
- **Purpose**: Retrieve BRENDA credentials from environment
|
||||||
|
- **Returns**: Tuple of (email, password)
|
||||||
|
- **Raises**: RuntimeError if credentials missing
|
||||||
|
|
||||||
|
**`_get_client() -> Client`**
|
||||||
|
- **Purpose**: Initialize or retrieve SOAP client
|
||||||
|
- **Returns**: Zeep Client instance
|
||||||
|
- **Features**: Singleton pattern, custom transport settings
|
||||||
|
|
||||||
|
**`_hash_password(password: str) -> str`**
|
||||||
|
- **Purpose**: Generate SHA-256 hash of password
|
||||||
|
- **Parameters**: `password` - Plain text password
|
||||||
|
- **Returns**: Hexadecimal SHA-256 hash
|
||||||
|
|
||||||
|
**`call_brenda(action: str, parameters: List[str]) -> str`**
|
||||||
|
- **Purpose**: Execute BRENDA SOAP action
|
||||||
|
- **Parameters**:
|
||||||
|
- `action` - SOAP action name (e.g., "getKmValue")
|
||||||
|
- `parameters` - List of parameters in correct order
|
||||||
|
- **Returns**: Raw response string from BRENDA
|
||||||
|
|
||||||
|
#### Convenience Functions
|
||||||
|
|
||||||
|
**`get_km_values(ec_number: str, organism: str = "*", substrate: str = "*") -> List[str]`**
|
||||||
|
- **Purpose**: Retrieve Km values for specified enzyme
|
||||||
|
- **Parameters**:
|
||||||
|
- `ec_number`: Enzyme Commission number
|
||||||
|
- `organism`: Organism name (wildcard allowed, default: "*")
|
||||||
|
- `substrate`: Substrate name (wildcard allowed, default: "*")
|
||||||
|
- **Returns**: List of parsed data strings
|
||||||
|
|
||||||
|
**`get_reactions(ec_number: str, organism: str = "*", reaction: str = "*") -> List[str]`**
|
||||||
|
- **Purpose**: Retrieve reaction data for specified enzyme
|
||||||
|
- **Parameters**:
|
||||||
|
- `ec_number`: Enzyme Commission number
|
||||||
|
- `organism`: Organism name (wildcard allowed, default: "*")
|
||||||
|
- `reaction`: Reaction pattern (wildcard allowed, default: "*")
|
||||||
|
- **Returns**: List of reaction data strings
|
||||||
|
|
||||||
|
#### Utility Functions
|
||||||
|
|
||||||
|
**`split_entries(return_text: str) -> List[str]`**
|
||||||
|
- **Purpose**: Normalize BRENDA responses to list format
|
||||||
|
- **Parameters**: `return_text` - Raw response from BRENDA
|
||||||
|
- **Returns**: List of individual data entries
|
||||||
|
- **Features**: Handles both string and complex object responses
|
||||||
|
|
||||||
|
## Data Structures and Parsing
|
||||||
|
|
||||||
|
### Km Entry Structure
|
||||||
|
|
||||||
|
**Parsed Km Entry Dictionary:**
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
'ecNumber': '1.1.1.1',
|
||||||
|
'organism': 'Escherichia coli',
|
||||||
|
'substrate': 'ethanol',
|
||||||
|
'kmValue': '0.12',
|
||||||
|
'km_value_numeric': 0.12, # Extracted numeric value
|
||||||
|
'kmValueMaximum': '',
|
||||||
|
'commentary': 'pH 7.4, 25°C',
|
||||||
|
'ph': 7.4, # Extracted from commentary
|
||||||
|
'temperature': 25.0, # Extracted from commentary
|
||||||
|
'ligandStructureId': '',
|
||||||
|
'literature': ''
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Reaction Entry Structure
|
||||||
|
|
||||||
|
**Parsed Reaction Entry Dictionary:**
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
'ecNumber': '1.1.1.1',
|
||||||
|
'organism': 'Saccharomyces cerevisiae',
|
||||||
|
'reaction': 'ethanol + NAD+ <=> acetaldehyde + NADH + H+',
|
||||||
|
'reactants': ['ethanol', 'NAD+'],
|
||||||
|
'products': ['acetaldehyde', 'NADH', 'H+'],
|
||||||
|
'commentary': '',
|
||||||
|
'literature': ''
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Query Patterns and Examples
|
||||||
|
|
||||||
|
### Basic Queries
|
||||||
|
|
||||||
|
**Get all Km values for an enzyme:**
|
||||||
|
```python
|
||||||
|
from brenda_client import get_km_values
|
||||||
|
|
||||||
|
# Get all alcohol dehydrogenase Km values
|
||||||
|
km_data = get_km_values("1.1.1.1")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get Km values for specific organism:**
|
||||||
|
```python
|
||||||
|
# Get human alcohol dehydrogenase Km values
|
||||||
|
human_km = get_km_values("1.1.1.1", organism="Homo sapiens")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get Km values for specific substrate:**
|
||||||
|
```python
|
||||||
|
# Get Km for ethanol oxidation
|
||||||
|
ethanol_km = get_km_values("1.1.1.1", substrate="ethanol")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Wildcard Searches
|
||||||
|
|
||||||
|
**Search for enzyme families:**
|
||||||
|
```python
|
||||||
|
# All alcohol dehydrogenases
|
||||||
|
alcohol_dehydrogenases = get_km_values("1.1.1.*")
|
||||||
|
|
||||||
|
# All hexokinases
|
||||||
|
hexokinases = get_km_values("2.7.1.*")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Search for organism groups:**
|
||||||
|
```python
|
||||||
|
# All E. coli strains
|
||||||
|
e_coli_enzymes = get_km_values("*", organism="Escherichia coli")
|
||||||
|
|
||||||
|
# All Bacillus species
|
||||||
|
bacillus_enzymes = get_km_values("*", organism="Bacillus*")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Combined Searches
|
||||||
|
|
||||||
|
**Specific enzyme-substrate combination:**
|
||||||
|
```python
|
||||||
|
# Get Km values for glucose oxidation in yeast
|
||||||
|
glucose_km = get_km_values("1.1.1.1",
|
||||||
|
organism="Saccharomyces cerevisiae",
|
||||||
|
substrate="glucose")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Reaction Queries
|
||||||
|
|
||||||
|
**Get all reactions for an enzyme:**
|
||||||
|
```python
|
||||||
|
from brenda_client import get_reactions
|
||||||
|
|
||||||
|
reactions = get_reactions("1.1.1.1")
|
||||||
|
```
|
||||||
|
|
||||||
|
**Search for reactions with specific substrates:**
|
||||||
|
```python
|
||||||
|
# Find reactions involving glucose
|
||||||
|
glucose_reactions = get_reactions("*", reaction="*glucose*")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Analysis Patterns
|
||||||
|
|
||||||
|
### Kinetic Parameter Analysis
|
||||||
|
|
||||||
|
**Extract numeric Km values:**
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import parse_km_entry
|
||||||
|
|
||||||
|
km_data = get_km_values("1.1.1.1", substrate="ethanol")
|
||||||
|
numeric_kms = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'km_value_numeric' in parsed:
|
||||||
|
numeric_kms.append(parsed['km_value_numeric'])
|
||||||
|
|
||||||
|
if numeric_kms:
|
||||||
|
print(f"Average Km: {sum(numeric_kms)/len(numeric_kms):.3f}")
|
||||||
|
print(f"Range: {min(numeric_kms):.3f} - {max(numeric_kms):.3f}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Organism Comparison
|
||||||
|
|
||||||
|
**Compare enzyme properties across organisms:**
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import compare_across_organisms
|
||||||
|
|
||||||
|
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||||
|
comparison = compare_across_organisms("1.1.1.1", organisms)
|
||||||
|
|
||||||
|
for org_data in comparison:
|
||||||
|
if org_data.get('data_points', 0) > 0:
|
||||||
|
print(f"{org_data['organism']}: {org_data['average_km']:.3f}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Substrate Specificity
|
||||||
|
|
||||||
|
**Analyze substrate preferences:**
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_substrate_specificity
|
||||||
|
|
||||||
|
specificity = get_substrate_specificity("1.1.1.1")
|
||||||
|
|
||||||
|
for substrate_data in specificity[:5]: # Top 5
|
||||||
|
print(f"{substrate_data['name']}: Km = {substrate_data['km']:.3f}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Integration Examples
|
||||||
|
|
||||||
|
### Metabolic Pathway Construction
|
||||||
|
|
||||||
|
**Build enzymatic pathway:**
|
||||||
|
```python
|
||||||
|
from scripts.enzyme_pathway_builder import find_pathway_for_product
|
||||||
|
|
||||||
|
# Find pathway for lactate production
|
||||||
|
pathway = find_pathway_for_product("lactate", max_steps=3)
|
||||||
|
|
||||||
|
for step in pathway['steps']:
|
||||||
|
print(f"Step {step['step_number']}: {step['substrate']} -> {step['product']}")
|
||||||
|
print(f"Enzymes available: {len(step['enzymes'])}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Enzyme Engineering Support
|
||||||
|
|
||||||
|
**Find thermostable variants:**
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import find_thermophilic_homologs
|
||||||
|
|
||||||
|
thermophilic = find_thermophilic_homologs("1.1.1.1", min_temp=50)
|
||||||
|
|
||||||
|
for enzyme in thermophilic:
|
||||||
|
print(f"{enzyme['organism']}: {enzyme['optimal_temperature']}°C")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kinetic Modeling
|
||||||
|
|
||||||
|
**Extract parameters for modeling:**
|
||||||
|
```python
|
||||||
|
from scripts.brenda_queries import get_modeling_parameters
|
||||||
|
|
||||||
|
model_data = get_modeling_parameters("1.1.1.1", substrate="ethanol")
|
||||||
|
|
||||||
|
print(f"Km: {model_data['km']}")
|
||||||
|
print(f"Vmax: {model_data['vmax']}")
|
||||||
|
print(f"Optimal conditions: pH {model_data['ph']}, {model_data['temperature']}°C")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
**Authentication Errors:**
|
||||||
|
- Check BRENDA_EMAIL and BRENDA_PASSWORD environment variables
|
||||||
|
- Verify account is active and has API access
|
||||||
|
- Note legacy BRENDA_EMIAL support (typo in variable name)
|
||||||
|
|
||||||
|
**No Data Returned:**
|
||||||
|
- Verify EC number format (e.g., "1.1.1.1", not "1.1.1")
|
||||||
|
- Check spelling of organism and substrate names
|
||||||
|
- Try wildcards for broader searches
|
||||||
|
- Some enzymes may have limited data in BRENDA
|
||||||
|
|
||||||
|
**Rate Limiting:**
|
||||||
|
- Implement delays between requests
|
||||||
|
- Cache results locally
|
||||||
|
- Use more specific queries to reduce data volume
|
||||||
|
- Consider batch operations
|
||||||
|
|
||||||
|
**Data Format Issues:**
|
||||||
|
- Use provided parsing functions
|
||||||
|
- Handle missing fields gracefully
|
||||||
|
- BRENDA data format can be inconsistent
|
||||||
|
- Validate parsed data before use
|
||||||
|
|
||||||
|
### Performance Optimization
|
||||||
|
|
||||||
|
**Query Efficiency:**
|
||||||
|
- Use specific EC numbers when known
|
||||||
|
- Limit by organism or substrate to reduce result size
|
||||||
|
- Cache frequently accessed data
|
||||||
|
- Batch similar requests
|
||||||
|
|
||||||
|
**Memory Management:**
|
||||||
|
- Process large datasets in chunks
|
||||||
|
- Use generators for large result sets
|
||||||
|
- Clear parsed data when no longer needed
|
||||||
|
|
||||||
|
**Network Optimization:**
|
||||||
|
- Implement retry logic for network errors
|
||||||
|
- Use appropriate timeouts
|
||||||
|
- Monitor request frequency
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
|
||||||
|
### Official Documentation
|
||||||
|
|
||||||
|
- **BRENDA Website**: https://www.brenda-enzymes.org/
|
||||||
|
- **SOAP API Documentation**: https://www.brenda-enzymes.org/soap.php
|
||||||
|
- **Enzyme Nomenclature**: https://www.iubmb.org/enzyme/
|
||||||
|
- **EC Number Database**: https://www.qmul.ac.uk/sbcs/iubmb/enzyme/
|
||||||
|
|
||||||
|
### Related Libraries
|
||||||
|
|
||||||
|
- **Zeep (SOAP Client)**: https://python-zeep.readthedocs.io/
|
||||||
|
- **PubChemPy**: https://pubchempy.readthedocs.io/
|
||||||
|
- **BioPython**: https://biopython.org/
|
||||||
|
- **RDKit**: https://www.rdkit.org/
|
||||||
|
|
||||||
|
### Data Formats
|
||||||
|
|
||||||
|
- **Enzyme Commission Numbers**: IUBMB enzyme classification
|
||||||
|
- **IUPAC Nomenclature**: Chemical naming conventions
|
||||||
|
- **Biochemical Reactions**: Standard equation notation
|
||||||
|
- **Kinetic Parameters**: Michaelis-Menten kinetics
|
||||||
|
|
||||||
|
### Community Resources
|
||||||
|
|
||||||
|
- **BRENDA Help Desk**: Support via official website
|
||||||
|
- **Bioinformatics Forums**: Stack Overflow, Biostars
|
||||||
|
- **GitHub Issues**: Project-specific bug reports
|
||||||
|
- **Research Papers**: Primary literature for enzyme data
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*This API reference covers the core functionality of the BRENDA SOAP API and Python client. For complete details on available data fields and query patterns, consult the official BRENDA documentation.*
|
||||||
844
scientific-skills/brenda-database/scripts/brenda_queries.py
Normal file
844
scientific-skills/brenda-database/scripts/brenda_queries.py
Normal file
@@ -0,0 +1,844 @@
|
|||||||
|
"""
|
||||||
|
BRENDA Database Query Utilities
|
||||||
|
|
||||||
|
This module provides high-level functions for querying and analyzing
|
||||||
|
enzyme data from the BRENDA database using the SOAP API.
|
||||||
|
|
||||||
|
Key features:
|
||||||
|
- Parse BRENDA response data entries
|
||||||
|
- Search for enzymes by substrate/product
|
||||||
|
- Compare enzyme properties across organisms
|
||||||
|
- Retrieve kinetic parameters and environmental conditions
|
||||||
|
- Analyze substrate specificity and inhibition
|
||||||
|
- Support for enzyme engineering and pathway design
|
||||||
|
- Export data in various formats
|
||||||
|
|
||||||
|
Installation:
|
||||||
|
uv pip install zeep requests pandas
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from scripts.brenda_queries import search_enzymes_by_substrate, compare_across_organisms
|
||||||
|
|
||||||
|
enzymes = search_enzymes_by_substrate("glucose", limit=20)
|
||||||
|
comparison = compare_across_organisms("1.1.1.1", ["E. coli", "S. cerevisiae"])
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
from typing import List, Dict, Any, Optional, Tuple
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
try:
|
||||||
|
from zeep import Client, Settings
|
||||||
|
from zeep.exceptions import Fault, TransportError
|
||||||
|
ZEEP_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
print("Warning: zeep not installed. Install with: uv pip install zeep")
|
||||||
|
ZEEP_AVAILABLE = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
REQUESTS_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
print("Warning: requests not installed. Install with: uv pip install requests")
|
||||||
|
REQUESTS_AVAILABLE = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
PANDAS_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
print("Warning: pandas not installed. Install with: uv pip install pandas")
|
||||||
|
PANDAS_AVAILABLE = False
|
||||||
|
|
||||||
|
# Import the brenda_client from the project root
|
||||||
|
import sys
|
||||||
|
sys.path.append(str(Path(__file__).parent.parent.parent.parent))
|
||||||
|
|
||||||
|
try:
|
||||||
|
from brenda_client import get_km_values, get_reactions, call_brenda
|
||||||
|
BRENDA_CLIENT_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
print("Warning: brenda_client not available")
|
||||||
|
BRENDA_CLIENT_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
def validate_dependencies():
|
||||||
|
"""Validate that required dependencies are installed."""
|
||||||
|
missing = []
|
||||||
|
if not ZEEP_AVAILABLE:
|
||||||
|
missing.append("zeep")
|
||||||
|
if not REQUESTS_AVAILABLE:
|
||||||
|
missing.append("requests")
|
||||||
|
if not BRENDA_CLIENT_AVAILABLE:
|
||||||
|
missing.append("brenda_client")
|
||||||
|
if missing:
|
||||||
|
raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_km_entry(entry: str) -> Dict[str, Any]:
|
||||||
|
"""Parse a BRENDA Km value entry into structured data."""
|
||||||
|
if not entry or not isinstance(entry, str):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
parsed = {}
|
||||||
|
parts = entry.split('#')
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
if '*' in part:
|
||||||
|
key, value = part.split('*', 1)
|
||||||
|
parsed[key.strip()] = value.strip()
|
||||||
|
|
||||||
|
# Extract numeric values from kmValue
|
||||||
|
if 'kmValue' in parsed:
|
||||||
|
km_value = parsed['kmValue']
|
||||||
|
# Extract first numeric value (in mM typically)
|
||||||
|
numeric_match = re.search(r'(\d+\.?\d*)', km_value)
|
||||||
|
if numeric_match:
|
||||||
|
parsed['km_value_numeric'] = float(numeric_match.group(1))
|
||||||
|
|
||||||
|
# Extract pH from commentary
|
||||||
|
if 'commentary' in parsed:
|
||||||
|
commentary = parsed['commentary']
|
||||||
|
ph_match = re.search(r'pH\s*([0-9.]+)', commentary)
|
||||||
|
if ph_match:
|
||||||
|
parsed['ph'] = float(ph_match.group(1))
|
||||||
|
|
||||||
|
temp_match = re.search(r'(\d+)\s*°?C', commentary)
|
||||||
|
if temp_match:
|
||||||
|
parsed['temperature'] = float(temp_match.group(1))
|
||||||
|
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def parse_reaction_entry(entry: str) -> Dict[str, Any]:
|
||||||
|
"""Parse a BRENDA reaction entry into structured data."""
|
||||||
|
if not entry or not isinstance(entry, str):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
parsed = {}
|
||||||
|
parts = entry.split('#')
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
if '*' in part:
|
||||||
|
key, value = part.split('*', 1)
|
||||||
|
parsed[key.strip()] = value.strip()
|
||||||
|
|
||||||
|
# Parse reaction equation
|
||||||
|
if 'reaction' in parsed:
|
||||||
|
reaction = parsed['reaction']
|
||||||
|
# Extract reactants and products
|
||||||
|
if '<=>' in reaction:
|
||||||
|
reactants, products = reaction.split('<=>', 1)
|
||||||
|
elif '->' in reaction:
|
||||||
|
reactants, products = reaction.split('->', 1)
|
||||||
|
elif '=' in reaction:
|
||||||
|
reactants, products = reaction.split('=', 1)
|
||||||
|
else:
|
||||||
|
reactants, products = reaction, ''
|
||||||
|
|
||||||
|
parsed['reactants'] = [r.strip() for r in reactants.split('+')]
|
||||||
|
parsed['products'] = [p.strip() for p in products.split('+')]
|
||||||
|
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def extract_organism_data(entry: str) -> Dict[str, Any]:
|
||||||
|
"""Extract organism-specific information from BRENDA entry."""
|
||||||
|
parsed = parse_km_entry(entry) if 'kmValue' in entry else parse_reaction_entry(entry)
|
||||||
|
|
||||||
|
if 'organism' in parsed:
|
||||||
|
return {
|
||||||
|
'organism': parsed['organism'],
|
||||||
|
'ec_number': parsed.get('ecNumber', ''),
|
||||||
|
'substrate': parsed.get('substrate', ''),
|
||||||
|
'km_value': parsed.get('kmValue', ''),
|
||||||
|
'km_numeric': parsed.get('km_value_numeric', None),
|
||||||
|
'ph': parsed.get('ph', None),
|
||||||
|
'temperature': parsed.get('temperature', None),
|
||||||
|
'commentary': parsed.get('commentary', ''),
|
||||||
|
'literature': parsed.get('literature', '')
|
||||||
|
}
|
||||||
|
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def search_enzymes_by_substrate(substrate: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||||||
|
"""Search for enzymes that act on a specific substrate."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
enzymes = []
|
||||||
|
|
||||||
|
# Search for Km values with the substrate
|
||||||
|
try:
|
||||||
|
km_data = get_km_values("*", substrate=substrate)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
for entry in km_data[:limit]:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if parsed:
|
||||||
|
enzymes.append({
|
||||||
|
'ec_number': parsed.get('ecNumber', ''),
|
||||||
|
'organism': parsed.get('organism', ''),
|
||||||
|
'substrate': parsed.get('substrate', ''),
|
||||||
|
'km_value': parsed.get('kmValue', ''),
|
||||||
|
'km_numeric': parsed.get('km_value_numeric', None),
|
||||||
|
'commentary': parsed.get('commentary', '')
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error searching enzymes by substrate: {e}")
|
||||||
|
|
||||||
|
# Remove duplicates based on EC number and organism
|
||||||
|
unique_enzymes = []
|
||||||
|
seen = set()
|
||||||
|
for enzyme in enzymes:
|
||||||
|
key = (enzyme['ec_number'], enzyme['organism'])
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_enzymes.append(enzyme)
|
||||||
|
|
||||||
|
return unique_enzymes[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def search_enzymes_by_product(product: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||||||
|
"""Search for enzymes that produce a specific product."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
enzymes = []
|
||||||
|
|
||||||
|
# Search for reactions containing the product
|
||||||
|
try:
|
||||||
|
# This is a simplified approach - in practice you might need
|
||||||
|
# more sophisticated pattern matching for products
|
||||||
|
reactions = get_reactions("*", reaction=f"*{product}*")
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
for entry in reactions[:limit]:
|
||||||
|
parsed = parse_reaction_entry(entry)
|
||||||
|
if parsed and 'products' in parsed:
|
||||||
|
# Check if our target product is in the products list
|
||||||
|
if any(product.lower() in prod.lower() for prod in parsed['products']):
|
||||||
|
enzymes.append({
|
||||||
|
'ec_number': parsed.get('ecNumber', ''),
|
||||||
|
'organism': parsed.get('organism', ''),
|
||||||
|
'reaction': parsed.get('reaction', ''),
|
||||||
|
'reactants': parsed.get('reactants', []),
|
||||||
|
'products': parsed.get('products', []),
|
||||||
|
'commentary': parsed.get('commentary', '')
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error searching enzymes by product: {e}")
|
||||||
|
|
||||||
|
return enzymes[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def compare_across_organisms(ec_number: str, organisms: List[str]) -> List[Dict[str, Any]]:
|
||||||
|
"""Compare enzyme properties across different organisms."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
comparison = []
|
||||||
|
|
||||||
|
for organism in organisms:
|
||||||
|
try:
|
||||||
|
# Get Km data for this organism
|
||||||
|
km_data = get_km_values(ec_number, organism=organism)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
if km_data:
|
||||||
|
# Calculate statistics
|
||||||
|
numeric_kms = []
|
||||||
|
phs = []
|
||||||
|
temperatures = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'km_value_numeric' in parsed:
|
||||||
|
numeric_kms.append(parsed['km_value_numeric'])
|
||||||
|
if 'ph' in parsed:
|
||||||
|
phs.append(parsed['ph'])
|
||||||
|
if 'temperature' in parsed:
|
||||||
|
temperatures.append(parsed['temperature'])
|
||||||
|
|
||||||
|
org_data = {
|
||||||
|
'organism': organism,
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'data_points': len(km_data),
|
||||||
|
'average_km': sum(numeric_kms) / len(numeric_kms) if numeric_kms else None,
|
||||||
|
'min_km': min(numeric_kms) if numeric_kms else None,
|
||||||
|
'max_km': max(numeric_kms) if numeric_kms else None,
|
||||||
|
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||||||
|
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||||||
|
'temperature_range': (min(temperatures), max(temperatures)) if temperatures else None
|
||||||
|
}
|
||||||
|
|
||||||
|
comparison.append(org_data)
|
||||||
|
else:
|
||||||
|
comparison.append({
|
||||||
|
'organism': organism,
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'data_points': 0,
|
||||||
|
'note': 'No data found'
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error comparing organism {organism}: {e}")
|
||||||
|
comparison.append({
|
||||||
|
'organism': organism,
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'error': str(e)
|
||||||
|
})
|
||||||
|
|
||||||
|
return comparison
|
||||||
|
|
||||||
|
|
||||||
|
def get_organisms_for_enzyme(ec_number: str) -> List[str]:
|
||||||
|
"""Get list of organisms that have data for a specific enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
organisms = set()
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'organism' in parsed:
|
||||||
|
organisms.add(parsed['organism'])
|
||||||
|
|
||||||
|
return sorted(list(organisms))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting organisms for enzyme {ec_number}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_environmental_parameters(ec_number: str) -> Dict[str, Any]:
|
||||||
|
"""Get environmental parameters (pH, temperature) for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
phs = []
|
||||||
|
temperatures = []
|
||||||
|
ph_stabilities = []
|
||||||
|
temp_stabilities = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
|
||||||
|
if 'ph' in parsed:
|
||||||
|
phs.append(parsed['ph'])
|
||||||
|
if 'temperature' in parsed:
|
||||||
|
temperatures.append(parsed['temperature'])
|
||||||
|
|
||||||
|
# Check commentary for stability information
|
||||||
|
commentary = parsed.get('commentary', '').lower()
|
||||||
|
if 'stable' in commentary and 'ph' in commentary:
|
||||||
|
# Extract pH stability range
|
||||||
|
ph_range_match = re.search(r'ph\s*([\d.]+)\s*[-–]\s*([\d.]+)', commentary)
|
||||||
|
if ph_range_match:
|
||||||
|
ph_stabilities.append((float(ph_range_match.group(1)), float(ph_range_match.group(2))))
|
||||||
|
|
||||||
|
if 'stable' in commentary and ('temp' in commentary or '°c' in commentary):
|
||||||
|
# Extract temperature stability
|
||||||
|
temp_match = re.search(r'(\d+)\s*[-–]\s*(\d+)\s*°?c', commentary)
|
||||||
|
if temp_match:
|
||||||
|
temp_stabilities.append((int(temp_match.group(1)), int(temp_match.group(2))))
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'data_points': len(km_data),
|
||||||
|
'ph_range': (min(phs), max(phs)) if phs else None,
|
||||||
|
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||||||
|
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||||||
|
'temperature_range': (min(temperatures), max(temperatures)) if temperatures else None,
|
||||||
|
'stability_ph': ph_stabilities[0] if ph_stabilities else None,
|
||||||
|
'temperature_stability': temp_stabilities[0] if temp_stabilities else None
|
||||||
|
}
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting environmental parameters for {ec_number}: {e}")
|
||||||
|
return {'ec_number': ec_number, 'error': str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def get_cofactor_requirements(ec_number: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Get cofactor requirements for an enzyme from reaction data."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
cofactors = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
reactions = get_reactions(ec_number)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
for entry in reactions:
|
||||||
|
parsed = parse_reaction_entry(entry)
|
||||||
|
if parsed and 'reactants' in parsed:
|
||||||
|
# Look for common cofactors in reactants
|
||||||
|
common_cofactors = [
|
||||||
|
'NAD+', 'NADH', 'NADP+', 'NADPH',
|
||||||
|
'ATP', 'ADP', 'AMP',
|
||||||
|
'FAD', 'FADH2',
|
||||||
|
'CoA', 'acetyl-CoA',
|
||||||
|
'pyridoxal phosphate', 'PLP',
|
||||||
|
'biotin',
|
||||||
|
'heme', 'iron-sulfur'
|
||||||
|
]
|
||||||
|
|
||||||
|
for reactant in parsed['reactants']:
|
||||||
|
for cofactor in common_cofactors:
|
||||||
|
if cofactor.lower() in reactant.lower():
|
||||||
|
cofactors.append({
|
||||||
|
'name': cofactor,
|
||||||
|
'full_name': reactant,
|
||||||
|
'type': 'oxidoreductase' if 'NAD' in cofactor else 'other',
|
||||||
|
'organism': parsed.get('organism', ''),
|
||||||
|
'ec_number': ec_number
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting cofactor requirements for {ec_number}: {e}")
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
unique_cofactors = []
|
||||||
|
seen = set()
|
||||||
|
for cofactor in cofactors:
|
||||||
|
key = (cofactor['name'], cofactor['organism'])
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_cofactors.append(cofactor)
|
||||||
|
|
||||||
|
return unique_cofactors
|
||||||
|
|
||||||
|
|
||||||
|
def get_substrate_specificity(ec_number: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Get substrate specificity data for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
specificity = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
substrate_data = {}
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'substrate' in parsed and 'km_value_numeric' in parsed:
|
||||||
|
substrate = parsed['substrate']
|
||||||
|
if substrate not in substrate_data:
|
||||||
|
substrate_data[substrate] = {
|
||||||
|
'name': substrate,
|
||||||
|
'km_values': [],
|
||||||
|
'organisms': set(),
|
||||||
|
'vmax_values': [], # If available
|
||||||
|
'kcat_values': [] # If available
|
||||||
|
}
|
||||||
|
|
||||||
|
substrate_data[substrate]['km_values'].append(parsed['km_value_numeric'])
|
||||||
|
if 'organism' in parsed:
|
||||||
|
substrate_data[substrate]['organisms'].add(parsed['organism'])
|
||||||
|
|
||||||
|
# Calculate summary statistics
|
||||||
|
for substrate, data in substrate_data.items():
|
||||||
|
if data['km_values']:
|
||||||
|
specificity.append({
|
||||||
|
'name': substrate,
|
||||||
|
'km': sum(data['km_values']) / len(data['km_values']),
|
||||||
|
'min_km': min(data['km_values']),
|
||||||
|
'max_km': max(data['km_values']),
|
||||||
|
'data_points': len(data['km_values']),
|
||||||
|
'organisms': list(data['organisms']),
|
||||||
|
'vmax': sum(data['vmax_values']) / len(data['vmax_values']) if data['vmax_values'] else None,
|
||||||
|
'kcat': sum(data['kcat_values']) / len(data['kcat_values']) if data['kcat_values'] else None,
|
||||||
|
'kcat_km_ratio': None # Would need kcat data to calculate
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by Km (lower is better affinity)
|
||||||
|
specificity.sort(key=lambda x: x['km'] if x['km'] else float('inf'))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting substrate specificity for {ec_number}: {e}")
|
||||||
|
|
||||||
|
return specificity
|
||||||
|
|
||||||
|
|
||||||
|
def compare_substrate_affinity(ec_number: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Compare substrate affinity for an enzyme."""
|
||||||
|
return get_substrate_specificity(ec_number)
|
||||||
|
|
||||||
|
|
||||||
|
def get_inhibitors(ec_number: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Get inhibitor information for an enzyme (from commentary)."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
inhibitors = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
commentary = parsed.get('commentary', '').lower()
|
||||||
|
|
||||||
|
# Look for inhibitor keywords
|
||||||
|
inhibitor_keywords = ['inhibited', 'inhibition', 'blocked', 'prevented', 'reduced']
|
||||||
|
if any(keyword in commentary for keyword in inhibitor_keywords):
|
||||||
|
# Try to extract inhibitor names (this is approximate)
|
||||||
|
# Common inhibitors
|
||||||
|
common_inhibitors = [
|
||||||
|
'iodoacetate', 'n-ethylmaleimide', 'p-chloromercuribenzoate',
|
||||||
|
'heavy metals', 'mercury', 'copper', 'zinc',
|
||||||
|
'cyanide', 'azide', 'carbon monoxide',
|
||||||
|
'edta', 'egta'
|
||||||
|
]
|
||||||
|
|
||||||
|
for inhibitor in common_inhibitors:
|
||||||
|
if inhibitor in commentary:
|
||||||
|
inhibitors.append({
|
||||||
|
'name': inhibitor,
|
||||||
|
'type': 'irreversible' if 'iodoacetate' in inhibitor or 'maleimide' in inhibitor else 'reversible',
|
||||||
|
'organism': parsed.get('organism', ''),
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'commentary': parsed.get('commentary', '')
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting inhibitors for {ec_number}: {e}")
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
unique_inhibitors = []
|
||||||
|
seen = set()
|
||||||
|
for inhibitor in inhibitors:
|
||||||
|
key = (inhibitor['name'], inhibitor['organism'])
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_inhibitors.append(inhibitor)
|
||||||
|
|
||||||
|
return unique_inhibitors
|
||||||
|
|
||||||
|
|
||||||
|
def get_activators(ec_number: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Get activator information for an enzyme (from commentary)."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
activators = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
commentary = parsed.get('commentary', '').lower()
|
||||||
|
|
||||||
|
# Look for activator keywords
|
||||||
|
activator_keywords = ['activated', 'stimulated', 'enhanced', 'increased']
|
||||||
|
if any(keyword in commentary for keyword in activator_keywords):
|
||||||
|
# Try to extract activator names (this is approximate)
|
||||||
|
common_activators = [
|
||||||
|
'mg2+', 'mn2+', 'ca2+', 'zn2+',
|
||||||
|
'k+', 'na+',
|
||||||
|
'phosphate', 'pyrophosphate',
|
||||||
|
'dithiothreitol', 'dtt',
|
||||||
|
'β-mercaptoethanol'
|
||||||
|
]
|
||||||
|
|
||||||
|
for activator in common_activators:
|
||||||
|
if activator in commentary:
|
||||||
|
activators.append({
|
||||||
|
'name': activator,
|
||||||
|
'type': 'metal ion' if '+' in activator else 'reducing agent' if 'dtt' in activator.lower() or 'mercapto' in activator.lower() else 'other',
|
||||||
|
'mechanism': 'allosteric' if 'allosteric' in commentary else 'cofactor' else 'unknown',
|
||||||
|
'organism': parsed.get('organism', ''),
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'commentary': parsed.get('commentary', '')
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting activators for {ec_number}: {e}")
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
unique_activators = []
|
||||||
|
seen = set()
|
||||||
|
for activator in activators:
|
||||||
|
key = (activator['name'], activator['organism'])
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_activators.append(activator)
|
||||||
|
|
||||||
|
return unique_activators
|
||||||
|
|
||||||
|
|
||||||
|
def find_thermophilic_homologs(ec_number: str, min_temp: int = 50) -> List[Dict[str, Any]]:
|
||||||
|
"""Find thermophilic homologs of an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
thermophilic = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
organisms = get_organisms_for_enzyme(ec_number)
|
||||||
|
|
||||||
|
for organism in organisms:
|
||||||
|
# Check if organism might be thermophilic based on name
|
||||||
|
thermophilic_keywords = ['therm', 'hypertherm', 'pyro']
|
||||||
|
if any(keyword in organism.lower() for keyword in thermophilic_keywords):
|
||||||
|
# Get kinetic data to extract temperature information
|
||||||
|
km_data = get_km_values(ec_number, organism=organism)
|
||||||
|
time.sleep(0.2) # Rate limiting
|
||||||
|
|
||||||
|
temperatures = []
|
||||||
|
kms = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'temperature' in parsed:
|
||||||
|
temperatures.append(parsed['temperature'])
|
||||||
|
if 'km_value_numeric' in parsed:
|
||||||
|
kms.append(parsed['km_value_numeric'])
|
||||||
|
|
||||||
|
if temperatures and max(temperatures) >= min_temp:
|
||||||
|
thermophilic.append({
|
||||||
|
'organism': organism,
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'optimal_temperature': max(temperatures),
|
||||||
|
'temperature_range': (min(temperatures), max(temperatures)),
|
||||||
|
'km': sum(kms) / len(kms) if kms else None,
|
||||||
|
'data_points': len(km_data)
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error finding thermophilic homologs for {ec_number}: {e}")
|
||||||
|
|
||||||
|
return thermophilic
|
||||||
|
|
||||||
|
|
||||||
|
def find_ph_stable_variants(ec_number: str, min_ph: float = 8.0, max_ph: float = 6.0) -> List[Dict[str, Any]]:
|
||||||
|
"""Find pH-stable variants of an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
ph_stable = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
organisms = get_organisms_for_enzyme(ec_number)
|
||||||
|
|
||||||
|
for organism in organisms:
|
||||||
|
km_data = get_km_values(ec_number, organism=organism)
|
||||||
|
time.sleep(0.2) # Rate limiting
|
||||||
|
|
||||||
|
phs = []
|
||||||
|
kms = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'ph' in parsed:
|
||||||
|
phs.append(parsed['ph'])
|
||||||
|
if 'km_value_numeric' in parsed:
|
||||||
|
kms.append(parsed['km_value_numeric'])
|
||||||
|
|
||||||
|
if phs:
|
||||||
|
ph_range = (min(phs), max(phs))
|
||||||
|
is_alkaline_stable = min_ph and ph_range[0] >= min_ph
|
||||||
|
is_acid_stable = max_ph and ph_range[1] <= max_ph
|
||||||
|
|
||||||
|
if is_alkaline_stable or is_acid_stable:
|
||||||
|
ph_stable.append({
|
||||||
|
'organism': organism,
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'ph_range': ph_range,
|
||||||
|
'optimal_ph': sum(phs) / len(phs),
|
||||||
|
'km': sum(kms) / len(kms) if kms else None,
|
||||||
|
'stability_type': 'alkaline' if is_alkaline_stable else 'acidic',
|
||||||
|
'data_points': len(km_data)
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error finding pH-stable variants for {ec_number}: {e}")
|
||||||
|
|
||||||
|
return ph_stable
|
||||||
|
|
||||||
|
|
||||||
|
def get_modeling_parameters(ec_number: str, substrate: str = None) -> Dict[str, Any]:
|
||||||
|
"""Get parameters suitable for kinetic modeling."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if substrate:
|
||||||
|
km_data = get_km_values(ec_number, substrate=substrate)
|
||||||
|
else:
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
if not km_data:
|
||||||
|
return {'ec_number': ec_number, 'error': 'No kinetic data found'}
|
||||||
|
|
||||||
|
# Extract modeling parameters
|
||||||
|
kms = []
|
||||||
|
phs = []
|
||||||
|
temperatures = []
|
||||||
|
v_max_values = []
|
||||||
|
kcat_values = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
|
||||||
|
if 'km_value_numeric' in parsed:
|
||||||
|
kms.append(parsed['km_value_numeric'])
|
||||||
|
if 'ph' in parsed:
|
||||||
|
phs.append(parsed['ph'])
|
||||||
|
if 'temperature' in parsed:
|
||||||
|
temperatures.append(parsed['temperature'])
|
||||||
|
|
||||||
|
# Look for Vmax and kcat in commentary (rare in BRENDA)
|
||||||
|
commentary = parsed.get('commentary', '').lower()
|
||||||
|
vmax_match = re.search(r'vmax\s*=\s*([\d.]+)', commentary)
|
||||||
|
if vmax_match:
|
||||||
|
v_max_values.append(float(vmax_match.group(1)))
|
||||||
|
|
||||||
|
kcat_match = re.search(r'kcat\s*=\s*([\d.]+)', commentary)
|
||||||
|
if kcat_match:
|
||||||
|
kcat_values.append(float(kcat_match.group(1)))
|
||||||
|
|
||||||
|
modeling_data = {
|
||||||
|
'ec_number': ec_number,
|
||||||
|
'substrate': substrate if substrate else 'various',
|
||||||
|
'km': sum(kms) / len(kms) if kms else None,
|
||||||
|
'km_std': (sum((x - sum(kms)/len(kms))**2 for x in kms) / len(kms))**0.5 if kms else None,
|
||||||
|
'vmax': sum(v_max_values) / len(v_max_values) if v_max_values else None,
|
||||||
|
'kcat': sum(kcat_values) / len(kcat_values) if kcat_values else None,
|
||||||
|
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||||||
|
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||||||
|
'data_points': len(km_data),
|
||||||
|
'temperature': sum(temperatures) / len(temperatures) if temperatures else 25.0, # Default to 25°C
|
||||||
|
'ph': sum(phs) / len(phs) if phs else 7.0, # Default to pH 7.0
|
||||||
|
'enzyme_conc': 1.0, # Default enzyme concentration (μM)
|
||||||
|
'substrate_conc': None, # Would be set by user
|
||||||
|
}
|
||||||
|
|
||||||
|
return modeling_data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {'ec_number': ec_number, 'error': str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def export_kinetic_data(ec_number: str, format: str = 'csv', filename: str = None) -> str:
|
||||||
|
"""Export kinetic data to file."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
if not filename:
|
||||||
|
filename = f"brenda_kinetic_data_{ec_number.replace('.', '_')}.{format}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get all kinetic data
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
if not km_data:
|
||||||
|
print(f"No kinetic data found for EC {ec_number}")
|
||||||
|
return filename
|
||||||
|
|
||||||
|
# Parse all entries
|
||||||
|
parsed_data = []
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if parsed:
|
||||||
|
parsed_data.append(parsed)
|
||||||
|
|
||||||
|
# Export based on format
|
||||||
|
if format.lower() == 'csv':
|
||||||
|
if parsed_data:
|
||||||
|
df = pd.DataFrame(parsed_data)
|
||||||
|
df.to_csv(filename, index=False)
|
||||||
|
else:
|
||||||
|
with open(filename, 'w', newline='') as f:
|
||||||
|
f.write('No data found')
|
||||||
|
|
||||||
|
elif format.lower() == 'json':
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
json.dump(parsed_data, f, indent=2, default=str)
|
||||||
|
|
||||||
|
elif format.lower() == 'excel':
|
||||||
|
if parsed_data and PANDAS_AVAILABLE:
|
||||||
|
df = pd.DataFrame(parsed_data)
|
||||||
|
df.to_excel(filename, index=False)
|
||||||
|
else:
|
||||||
|
print("pandas required for Excel export")
|
||||||
|
return filename
|
||||||
|
|
||||||
|
print(f"Exported {len(parsed_data)} entries to {filename}")
|
||||||
|
return filename
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error exporting data: {e}")
|
||||||
|
return filename
|
||||||
|
|
||||||
|
|
||||||
|
def search_by_pattern(pattern: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||||||
|
"""Search enzymes using a reaction pattern or keyword."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
enzymes = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Search reactions containing the pattern
|
||||||
|
reactions = get_reactions("*", reaction=f"*{pattern}*")
|
||||||
|
time.sleep(0.5) # Rate limiting
|
||||||
|
|
||||||
|
for entry in reactions[:limit]:
|
||||||
|
parsed = parse_reaction_entry(entry)
|
||||||
|
if parsed:
|
||||||
|
enzymes.append({
|
||||||
|
'ec_number': parsed.get('ecNumber', ''),
|
||||||
|
'organism': parsed.get('organism', ''),
|
||||||
|
'reaction': parsed.get('reaction', ''),
|
||||||
|
'reactants': parsed.get('reactants', []),
|
||||||
|
'products': parsed.get('products', []),
|
||||||
|
'commentary': parsed.get('commentary', '')
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error searching by pattern '{pattern}': {e}")
|
||||||
|
|
||||||
|
return enzymes
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Example usage
|
||||||
|
print("BRENDA Database Query Examples")
|
||||||
|
print("=" * 40)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Example 1: Search enzymes by substrate
|
||||||
|
print("\n1. Searching enzymes for 'glucose':")
|
||||||
|
enzymes = search_enzymes_by_substrate("glucose", limit=5)
|
||||||
|
for enzyme in enzymes:
|
||||||
|
print(f" EC {enzyme['ec_number']}: {enzyme['organism']}")
|
||||||
|
print(f" Km: {enzyme['km_value']}")
|
||||||
|
|
||||||
|
# Example 2: Compare across organisms
|
||||||
|
print("\n2. Comparing alcohol dehydrogenase (1.1.1.1) across organisms:")
|
||||||
|
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||||
|
comparison = compare_across_organisms("1.1.1.1", organisms)
|
||||||
|
for comp in comparison:
|
||||||
|
if comp.get('data_points', 0) > 0:
|
||||||
|
print(f" {comp['organism']}:")
|
||||||
|
print(f" Avg Km: {comp.get('average_km', 'N/A')}")
|
||||||
|
print(f" Optimal pH: {comp.get('optimal_ph', 'N/A')}")
|
||||||
|
|
||||||
|
# Example 3: Get environmental parameters
|
||||||
|
print("\n3. Environmental parameters for 1.1.1.1:")
|
||||||
|
params = get_environmental_parameters("1.1.1.1")
|
||||||
|
if params.get('data_points', 0) > 0:
|
||||||
|
print(f" pH range: {params.get('ph_range', 'N/A')}")
|
||||||
|
print(f" Temperature range: {params.get('temperature_range', 'N/A')}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Example failed: {e}")
|
||||||
@@ -0,0 +1,772 @@
|
|||||||
|
"""
|
||||||
|
BRENDA Database Visualization Utilities
|
||||||
|
|
||||||
|
This module provides visualization functions for BRENDA enzyme data,
|
||||||
|
including kinetic parameters, environmental conditions, and pathway analysis.
|
||||||
|
|
||||||
|
Key features:
|
||||||
|
- Plot Km, kcat, and Vmax distributions
|
||||||
|
- Compare enzyme properties across organisms
|
||||||
|
- Visualize pH and temperature activity profiles
|
||||||
|
- Plot substrate specificity and affinity data
|
||||||
|
- Generate Michaelis-Menten curves
|
||||||
|
- Create heatmaps and correlation plots
|
||||||
|
- Support for pathway visualization
|
||||||
|
|
||||||
|
Installation:
|
||||||
|
uv pip install matplotlib seaborn pandas numpy
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from scripts.brenda_visualization import plot_kinetic_parameters, plot_michaelis_menten
|
||||||
|
|
||||||
|
plot_kinetic_parameters("1.1.1.1")
|
||||||
|
plot_michaelis_menten("1.1.1.1", substrate="ethanol")
|
||||||
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
from typing import List, Dict, Any, Optional, Tuple
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
PANDAS_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
print("Warning: pandas not installed. Install with: uv pip install pandas")
|
||||||
|
PANDAS_AVAILABLE = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from brenda_queries import (
|
||||||
|
get_km_values, get_reactions, parse_km_entry, parse_reaction_entry,
|
||||||
|
compare_across_organisms, get_environmental_parameters,
|
||||||
|
get_substrate_specificity, get_modeling_parameters,
|
||||||
|
search_enzymes_by_substrate, search_by_pattern
|
||||||
|
)
|
||||||
|
BRENDA_QUERIES_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
print("Warning: brenda_queries not available")
|
||||||
|
BRENDA_QUERIES_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
# Set style for plots
|
||||||
|
plt.style.use('default')
|
||||||
|
sns.set_palette("husl")
|
||||||
|
|
||||||
|
|
||||||
|
def validate_dependencies():
|
||||||
|
"""Validate that required dependencies are installed."""
|
||||||
|
missing = []
|
||||||
|
if not PANDAS_AVAILABLE:
|
||||||
|
missing.append("pandas")
|
||||||
|
if not BRENDA_QUERIES_AVAILABLE:
|
||||||
|
missing.append("brenda_queries")
|
||||||
|
if missing:
|
||||||
|
raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
|
||||||
|
|
||||||
|
|
||||||
|
def plot_kinetic_parameters(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||||
|
"""Plot kinetic parameter distributions for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get Km data
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
|
||||||
|
if not km_data:
|
||||||
|
print(f"No kinetic data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Parse data
|
||||||
|
parsed_entries = []
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'km_value_numeric' in parsed:
|
||||||
|
parsed_entries.append(parsed)
|
||||||
|
|
||||||
|
if not parsed_entries:
|
||||||
|
print(f"No numeric Km data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Create figure with subplots
|
||||||
|
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||||||
|
fig.suptitle(f'Kinetic Parameters for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||||
|
|
||||||
|
# Extract data
|
||||||
|
km_values = [entry['km_value_numeric'] for entry in parsed_entries]
|
||||||
|
organisms = [entry.get('organism', 'Unknown') for entry in parsed_entries]
|
||||||
|
substrates = [entry.get('substrate', 'Unknown') for entry in parsed_entries]
|
||||||
|
|
||||||
|
# Plot 1: Km distribution histogram
|
||||||
|
ax1.hist(km_values, bins=30, alpha=0.7, edgecolor='black')
|
||||||
|
ax1.set_xlabel('Km (mM)')
|
||||||
|
ax1.set_ylabel('Frequency')
|
||||||
|
ax1.set_title('Km Value Distribution')
|
||||||
|
ax1.axvline(np.mean(km_values), color='red', linestyle='--', label=f'Mean: {np.mean(km_values):.2f}')
|
||||||
|
ax1.axvline(np.median(km_values), color='blue', linestyle='--', label=f'Median: {np.median(km_values):.2f}')
|
||||||
|
ax1.legend()
|
||||||
|
|
||||||
|
# Plot 2: Km by organism (top 10)
|
||||||
|
if PANDAS_AVAILABLE:
|
||||||
|
df = pd.DataFrame({'Km': km_values, 'Organism': organisms})
|
||||||
|
organism_means = df.groupby('Organism')['Km'].mean().sort_values(ascending=False).head(10)
|
||||||
|
|
||||||
|
organism_means.plot(kind='bar', ax=ax2)
|
||||||
|
ax2.set_ylabel('Mean Km (mM)')
|
||||||
|
ax2.set_title('Mean Km by Organism (Top 10)')
|
||||||
|
ax2.tick_params(axis='x', rotation=45)
|
||||||
|
|
||||||
|
# Plot 3: Km by substrate (top 10)
|
||||||
|
if PANDAS_AVAILABLE:
|
||||||
|
df = pd.DataFrame({'Km': km_values, 'Substrate': substrates})
|
||||||
|
substrate_means = df.groupby('Substrate')['Km'].mean().sort_values(ascending=False).head(10)
|
||||||
|
|
||||||
|
substrate_means.plot(kind='bar', ax=ax3)
|
||||||
|
ax3.set_ylabel('Mean Km (mM)')
|
||||||
|
ax3.set_title('Mean Km by Substrate (Top 10)')
|
||||||
|
ax3.tick_params(axis='x', rotation=45)
|
||||||
|
|
||||||
|
# Plot 4: Box plot by organism (top 5)
|
||||||
|
if PANDAS_AVAILABLE:
|
||||||
|
top_organisms = df.groupby('Organism')['Km'].count().sort_values(ascending=False).head(5).index
|
||||||
|
top_data = df[df['Organism'].isin(top_organisms)]
|
||||||
|
|
||||||
|
sns.boxplot(data=top_data, x='Organism', y='Km', ax=ax4)
|
||||||
|
ax4.set_ylabel('Km (mM)')
|
||||||
|
ax4.set_title('Km Distribution by Organism (Top 5)')
|
||||||
|
ax4.tick_params(axis='x', rotation=45)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save plot
|
||||||
|
if save_path:
|
||||||
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||||
|
print(f"Kinetic parameters plot saved to {save_path}")
|
||||||
|
|
||||||
|
if show_plot:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return save_path or f"kinetic_parameters_{ec_number.replace('.', '_')}.png"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error plotting kinetic parameters: {e}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
def plot_organism_comparison(ec_number: str, organisms: List[str], save_path: str = None, show_plot: bool = True) -> str:
|
||||||
|
"""Compare enzyme properties across multiple organisms."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get comparison data
|
||||||
|
comparison = compare_across_organisms(ec_number, organisms)
|
||||||
|
|
||||||
|
if not comparison:
|
||||||
|
print(f"No comparison data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Filter out entries with no data
|
||||||
|
valid_data = [c for c in comparison if c.get('data_points', 0) > 0]
|
||||||
|
|
||||||
|
if not valid_data:
|
||||||
|
print(f"No valid data for organism comparison of EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||||||
|
fig.suptitle(f'Organism Comparison for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||||
|
|
||||||
|
# Extract data
|
||||||
|
names = [c['organism'] for c in valid_data]
|
||||||
|
avg_kms = [c.get('average_km', 0) for c in valid_data if c.get('average_km')]
|
||||||
|
optimal_phs = [c.get('optimal_ph', 0) for c in valid_data if c.get('optimal_ph')]
|
||||||
|
optimal_temps = [c.get('optimal_temperature', 0) for c in valid_data if c.get('optimal_temperature')]
|
||||||
|
data_points = [c.get('data_points', 0) for c in valid_data]
|
||||||
|
|
||||||
|
# Plot 1: Average Km comparison
|
||||||
|
if avg_kms:
|
||||||
|
ax1.bar(names, avg_kms)
|
||||||
|
ax1.set_ylabel('Average Km (mM)')
|
||||||
|
ax1.set_title('Average Km Comparison')
|
||||||
|
ax1.tick_params(axis='x', rotation=45)
|
||||||
|
|
||||||
|
# Plot 2: Optimal pH comparison
|
||||||
|
if optimal_phs:
|
||||||
|
ax2.bar(names, optimal_phs)
|
||||||
|
ax2.set_ylabel('Optimal pH')
|
||||||
|
ax2.set_title('Optimal pH Comparison')
|
||||||
|
ax2.tick_params(axis='x', rotation=45)
|
||||||
|
|
||||||
|
# Plot 3: Optimal temperature comparison
|
||||||
|
if optimal_temps:
|
||||||
|
ax3.bar(names, optimal_temps)
|
||||||
|
ax3.set_ylabel('Optimal Temperature (°C)')
|
||||||
|
ax3.set_title('Optimal Temperature Comparison')
|
||||||
|
ax3.tick_params(axis='x', rotation=45)
|
||||||
|
|
||||||
|
# Plot 4: Data points comparison
|
||||||
|
ax4.bar(names, data_points)
|
||||||
|
ax4.set_ylabel('Number of Data Points')
|
||||||
|
ax4.set_title('Available Data Points')
|
||||||
|
ax4.tick_params(axis='x', rotation=45)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save plot
|
||||||
|
if save_path:
|
||||||
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||||
|
print(f"Organism comparison plot saved to {save_path}")
|
||||||
|
|
||||||
|
if show_plot:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return save_path or f"organism_comparison_{ec_number.replace('.', '_')}.png"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error plotting organism comparison: {e}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
def plot_pH_profiles(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||||
|
"""Plot pH activity profiles for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get kinetic data
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
|
||||||
|
if not km_data:
|
||||||
|
print(f"No pH data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Parse data and extract pH information
|
||||||
|
ph_kms = []
|
||||||
|
ph_organisms = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'ph' in parsed and 'km_value_numeric' in parsed:
|
||||||
|
ph_kms.append((parsed['ph'], parsed['km_value_numeric']))
|
||||||
|
ph_organisms.append(parsed.get('organism', 'Unknown'))
|
||||||
|
|
||||||
|
if not ph_kms:
|
||||||
|
print(f"No pH-Km data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||||
|
fig.suptitle(f'pH Activity Profiles for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||||
|
|
||||||
|
# Extract data
|
||||||
|
ph_values = [item[0] for item in ph_kms]
|
||||||
|
km_values = [item[1] for item in ph_kms]
|
||||||
|
|
||||||
|
# Plot 1: pH vs Km scatter plot
|
||||||
|
scatter = ax1.scatter(ph_values, km_values, alpha=0.6, s=50)
|
||||||
|
ax1.set_xlabel('pH')
|
||||||
|
ax1.set_ylabel('Km (mM)')
|
||||||
|
ax1.set_title('pH vs Km Values')
|
||||||
|
ax1.grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
# Add trend line
|
||||||
|
if len(ph_values) > 2:
|
||||||
|
z = np.polyfit(ph_values, km_values, 1)
|
||||||
|
p = np.poly1d(z)
|
||||||
|
ax1.plot(ph_values, p(ph_values), "r--", alpha=0.8, label=f'Trend: y={z[0]:.3f}x+{z[1]:.3f}')
|
||||||
|
ax1.legend()
|
||||||
|
|
||||||
|
# Plot 2: pH distribution histogram
|
||||||
|
ax2.hist(ph_values, bins=20, alpha=0.7, edgecolor='black')
|
||||||
|
ax2.set_xlabel('pH')
|
||||||
|
ax2.set_ylabel('Frequency')
|
||||||
|
ax2.set_title('pH Distribution')
|
||||||
|
ax2.axvline(np.mean(ph_values), color='red', linestyle='--', label=f'Mean: {np.mean(ph_values):.2f}')
|
||||||
|
ax2.axvline(np.median(ph_values), color='blue', linestyle='--', label=f'Median: {np.median(ph_values):.2f}')
|
||||||
|
ax2.legend()
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save plot
|
||||||
|
if save_path:
|
||||||
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||||
|
print(f"pH profile plot saved to {save_path}")
|
||||||
|
|
||||||
|
if show_plot:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return save_path or f"ph_profile_{ec_number.replace('.', '_')}.png"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error plotting pH profiles: {e}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
def plot_temperature_profiles(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||||
|
"""Plot temperature activity profiles for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get kinetic data
|
||||||
|
km_data = get_km_values(ec_number)
|
||||||
|
|
||||||
|
if not km_data:
|
||||||
|
print(f"No temperature data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Parse data and extract temperature information
|
||||||
|
temp_kms = []
|
||||||
|
temp_organisms = []
|
||||||
|
|
||||||
|
for entry in km_data:
|
||||||
|
parsed = parse_km_entry(entry)
|
||||||
|
if 'temperature' in parsed and 'km_value_numeric' in parsed:
|
||||||
|
temp_kms.append((parsed['temperature'], parsed['km_value_numeric']))
|
||||||
|
temp_organisms.append(parsed.get('organism', 'Unknown'))
|
||||||
|
|
||||||
|
if not temp_kms:
|
||||||
|
print(f"No temperature-Km data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||||
|
fig.suptitle(f'Temperature Activity Profiles for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||||
|
|
||||||
|
# Extract data
|
||||||
|
temp_values = [item[0] for item in temp_kms]
|
||||||
|
km_values = [item[1] for item in temp_kms]
|
||||||
|
|
||||||
|
# Plot 1: Temperature vs Km scatter plot
|
||||||
|
scatter = ax1.scatter(temp_values, km_values, alpha=0.6, s=50)
|
||||||
|
ax1.set_xlabel('Temperature (°C)')
|
||||||
|
ax1.set_ylabel('Km (mM)')
|
||||||
|
ax1.set_title('Temperature vs Km Values')
|
||||||
|
ax1.grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
# Add trend line
|
||||||
|
if len(temp_values) > 2:
|
||||||
|
z = np.polyfit(temp_values, km_values, 2) # Quadratic fit for temperature optima
|
||||||
|
p = np.poly1d(z)
|
||||||
|
x_smooth = np.linspace(min(temp_values), max(temp_values), 100)
|
||||||
|
ax1.plot(x_smooth, p(x_smooth), "r--", alpha=0.8, label='Polynomial fit')
|
||||||
|
|
||||||
|
# Find optimum temperature
|
||||||
|
optimum_idx = np.argmin(p(x_smooth))
|
||||||
|
optimum_temp = x_smooth[optimum_idx]
|
||||||
|
ax1.axvline(optimum_temp, color='green', linestyle=':', label=f'Optimal: {optimum_temp:.1f}°C')
|
||||||
|
ax1.legend()
|
||||||
|
|
||||||
|
# Plot 2: Temperature distribution histogram
|
||||||
|
ax2.hist(temp_values, bins=20, alpha=0.7, edgecolor='black')
|
||||||
|
ax2.set_xlabel('Temperature (°C)')
|
||||||
|
ax2.set_ylabel('Frequency')
|
||||||
|
ax2.set_title('Temperature Distribution')
|
||||||
|
ax2.axvline(np.mean(temp_values), color='red', linestyle='--', label=f'Mean: {np.mean(temp_values):.1f}°C')
|
||||||
|
ax2.axvline(np.median(temp_values), color='blue', linestyle='--', label=f'Median: {np.median(temp_values):.1f}°C')
|
||||||
|
ax2.legend()
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save plot
|
||||||
|
if save_path:
|
||||||
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||||
|
print(f"Temperature profile plot saved to {save_path}")
|
||||||
|
|
||||||
|
if show_plot:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return save_path or f"temperature_profile_{ec_number.replace('.', '_')}.png"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error plotting temperature profiles: {e}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
def plot_substrate_specificity(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||||
|
"""Plot substrate specificity and affinity for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get substrate specificity data
|
||||||
|
specificity = get_substrate_specificity(ec_number)
|
||||||
|
|
||||||
|
if not specificity:
|
||||||
|
print(f"No substrate specificity data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||||||
|
fig.suptitle(f'Substrate Specificity for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||||
|
|
||||||
|
# Extract data
|
||||||
|
substrates = [s['name'] for s in specificity]
|
||||||
|
kms = [s['km'] for s in specificity if s.get('km')]
|
||||||
|
data_points = [s['data_points'] for s in specificity]
|
||||||
|
|
||||||
|
# Get top substrates for plotting
|
||||||
|
if PANDAS_AVAILABLE and kms:
|
||||||
|
df = pd.DataFrame({'Substrate': substrates, 'Km': kms, 'DataPoints': data_points})
|
||||||
|
top_substrates = df.nlargest(15, 'DataPoints') # Top 15 by data points
|
||||||
|
|
||||||
|
# Plot 1: Km values for top substrates (sorted by affinity)
|
||||||
|
top_sorted = top_substrates.sort_values('Km')
|
||||||
|
ax1.barh(range(len(top_sorted)), top_sorted['Km'])
|
||||||
|
ax1.set_yticks(range(len(top_sorted)))
|
||||||
|
ax1.set_yticklabels([s[:30] + '...' if len(s) > 30 else s for s in top_sorted['Substrate']])
|
||||||
|
ax1.set_xlabel('Km (mM)')
|
||||||
|
ax1.set_title('Substrate Affinity (Lower Km = Higher Affinity)')
|
||||||
|
ax1.invert_yaxis() # Best affinity at top
|
||||||
|
|
||||||
|
# Plot 2: Data points by substrate
|
||||||
|
ax2.barh(range(len(top_sorted)), top_sorted['DataPoints'])
|
||||||
|
ax2.set_yticks(range(len(top_sorted)))
|
||||||
|
ax2.set_yticklabels([s[:30] + '...' if len(s) > 30 else s for s in top_sorted['Substrate']])
|
||||||
|
ax2.set_xlabel('Number of Data Points')
|
||||||
|
ax2.set_title('Data Availability by Substrate')
|
||||||
|
ax2.invert_yaxis()
|
||||||
|
|
||||||
|
# Plot 3: Km distribution
|
||||||
|
ax3.hist(kms, bins=20, alpha=0.7, edgecolor='black')
|
||||||
|
ax3.set_xlabel('Km (mM)')
|
||||||
|
ax3.set_ylabel('Frequency')
|
||||||
|
ax3.set_title('Km Value Distribution')
|
||||||
|
ax3.axvline(np.mean(kms), color='red', linestyle='--', label=f'Mean: {np.mean(kms):.2f}')
|
||||||
|
ax3.axvline(np.median(kms), color='blue', linestyle='--', label=f'Median: {np.median(kms):.2f}')
|
||||||
|
ax3.legend()
|
||||||
|
|
||||||
|
# Plot 4: Km vs Data Points scatter
|
||||||
|
ax4.scatter(df['DataPoints'], df['Km'], alpha=0.6)
|
||||||
|
ax4.set_xlabel('Number of Data Points')
|
||||||
|
ax4.set_ylabel('Km (mM)')
|
||||||
|
ax4.set_title('Km vs Data Points')
|
||||||
|
ax4.grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save plot
|
||||||
|
if save_path:
|
||||||
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||||
|
print(f"Substrate specificity plot saved to {save_path}")
|
||||||
|
|
||||||
|
if show_plot:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return save_path or f"substrate_specificity_{ec_number.replace('.', '_')}.png"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error plotting substrate specificity: {e}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
def plot_michaelis_menten(ec_number: str, substrate: str = None, save_path: str = None, show_plot: bool = True) -> str:
|
||||||
|
"""Generate Michaelis-Menten curves for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get modeling parameters
|
||||||
|
model_data = get_modeling_parameters(ec_number, substrate)
|
||||||
|
|
||||||
|
if not model_data or model_data.get('error'):
|
||||||
|
print(f"No modeling data found for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
km = model_data.get('km')
|
||||||
|
vmax = model_data.get('vmax')
|
||||||
|
kcat = model_data.get('kcat')
|
||||||
|
enzyme_conc = model_data.get('enzyme_conc', 1.0)
|
||||||
|
|
||||||
|
if not km:
|
||||||
|
print(f"No Km data available for plotting")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||||
|
fig.suptitle(f'Michaelis-Menten Kinetics for EC {ec_number}' + (f' - {substrate}' if substrate else ''),
|
||||||
|
fontsize=16, fontweight='bold')
|
||||||
|
|
||||||
|
# Generate substrate concentration range
|
||||||
|
substrate_range = np.linspace(0, km * 5, 1000)
|
||||||
|
|
||||||
|
# Calculate reaction rates
|
||||||
|
if vmax:
|
||||||
|
# Use actual Vmax if available
|
||||||
|
rates = (vmax * substrate_range) / (km + substrate_range)
|
||||||
|
elif kcat and enzyme_conc:
|
||||||
|
# Calculate Vmax from kcat and enzyme concentration
|
||||||
|
vmax_calc = kcat * enzyme_conc
|
||||||
|
rates = (vmax_calc * substrate_range) / (km + substrate_range)
|
||||||
|
else:
|
||||||
|
# Use normalized Vmax = 1.0
|
||||||
|
rates = substrate_range / (km + substrate_range)
|
||||||
|
|
||||||
|
# Plot 1: Michaelis-Menten curve
|
||||||
|
ax1.plot(substrate_range, rates, 'b-', linewidth=2, label='Michaelis-Menten')
|
||||||
|
ax1.axhline(y=rates[-1] * 0.5, color='r', linestyle='--', alpha=0.7, label='0.5 × Vmax')
|
||||||
|
ax1.axvline(x=km, color='g', linestyle='--', alpha=0.7, label=f'Km = {km:.2f}')
|
||||||
|
ax1.set_xlabel('Substrate Concentration (mM)')
|
||||||
|
ax1.set_ylabel('Reaction Rate')
|
||||||
|
ax1.set_title('Michaelis-Menten Curve')
|
||||||
|
ax1.legend()
|
||||||
|
ax1.grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
# Add annotation for Km
|
||||||
|
km_rate = (substrate_range[km == min(substrate_range, key=lambda x: abs(x-km))] *
|
||||||
|
(vmax if vmax else kcat * enzyme_conc if kcat else 1.0)) / (km +
|
||||||
|
substrate_range[km == min(substrate_range, key=lambda x: abs(x-km))])
|
||||||
|
ax1.plot(km, km_rate, 'ro', markersize=8)
|
||||||
|
|
||||||
|
# Plot 2: Lineweaver-Burk plot (double reciprocal)
|
||||||
|
substrate_range_nonzero = substrate_range[substrate_range > 0]
|
||||||
|
rates_nonzero = rates[substrate_range > 0]
|
||||||
|
|
||||||
|
reciprocal_substrate = 1 / substrate_range_nonzero
|
||||||
|
reciprocal_rate = 1 / rates_nonzero
|
||||||
|
|
||||||
|
ax2.scatter(reciprocal_substrate, reciprocal_rate, alpha=0.6, s=10)
|
||||||
|
|
||||||
|
# Fit linear regression
|
||||||
|
z = np.polyfit(reciprocal_substrate, reciprocal_rate, 1)
|
||||||
|
p = np.poly1d(z)
|
||||||
|
x_fit = np.linspace(min(reciprocal_substrate), max(reciprocal_substrate), 100)
|
||||||
|
ax2.plot(x_fit, p(x_fit), 'r-', linewidth=2, label=f'1/Vmax = {z[1]:.3f}')
|
||||||
|
|
||||||
|
ax2.set_xlabel('1/[Substrate] (1/mM)')
|
||||||
|
ax2.set_ylabel('1/Rate')
|
||||||
|
ax2.set_title('Lineweaver-Burk Plot')
|
||||||
|
ax2.legend()
|
||||||
|
ax2.grid(True, alpha=0.3)
|
||||||
|
|
||||||
|
# Add parameter information
|
||||||
|
info_text = f"Km = {km:.3f} mM"
|
||||||
|
if vmax:
|
||||||
|
info_text += f"\nVmax = {vmax:.3f}"
|
||||||
|
if kcat:
|
||||||
|
info_text += f"\nkcat = {kcat:.3f} s⁻¹"
|
||||||
|
if enzyme_conc:
|
||||||
|
info_text += f"\n[Enzyme] = {enzyme_conc:.3f} μM"
|
||||||
|
|
||||||
|
fig.text(0.02, 0.98, info_text, transform=fig.transFigure,
|
||||||
|
fontsize=10, verticalalignment='top',
|
||||||
|
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save plot
|
||||||
|
if save_path:
|
||||||
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||||
|
print(f"Michaelis-Menten plot saved to {save_path}")
|
||||||
|
|
||||||
|
if show_plot:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return save_path or f"michaelis_menten_{ec_number.replace('.', '_')}_{substrate or 'all'}.png"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error plotting Michaelis-Menten: {e}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
def create_heatmap_data(ec_number: str, parameters: List[str] = None) -> Dict[str, Any]:
|
||||||
|
"""Create data for heatmap visualization."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get comparison data across organisms
|
||||||
|
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Bacillus subtilis",
|
||||||
|
"Homo sapiens", "Mus musculus", "Rattus norvegicus"]
|
||||||
|
comparison = compare_across_organisms(ec_number, organisms)
|
||||||
|
|
||||||
|
if not comparison:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Create heatmap data
|
||||||
|
heatmap_data = {
|
||||||
|
'organisms': [],
|
||||||
|
'average_km': [],
|
||||||
|
'optimal_ph': [],
|
||||||
|
'optimal_temperature': [],
|
||||||
|
'data_points': []
|
||||||
|
}
|
||||||
|
|
||||||
|
for comp in comparison:
|
||||||
|
if comp.get('data_points', 0) > 0:
|
||||||
|
heatmap_data['organisms'].append(comp['organism'])
|
||||||
|
heatmap_data['average_km'].append(comp.get('average_km', 0))
|
||||||
|
heatmap_data['optimal_ph'].append(comp.get('optimal_ph', 0))
|
||||||
|
heatmap_data['optimal_temperature'].append(comp.get('optimal_temperature', 0))
|
||||||
|
heatmap_data['data_points'].append(comp.get('data_points', 0))
|
||||||
|
|
||||||
|
return heatmap_data
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating heatmap data: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def plot_heatmap(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||||
|
"""Create heatmap visualization of enzyme properties."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
try:
|
||||||
|
heatmap_data = create_heatmap_data(ec_number)
|
||||||
|
|
||||||
|
if not heatmap_data or not heatmap_data['organisms']:
|
||||||
|
print(f"No heatmap data available for EC {ec_number}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
if not PANDAS_AVAILABLE:
|
||||||
|
print("pandas required for heatmap plotting")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
# Create DataFrame for heatmap
|
||||||
|
df = pd.DataFrame({
|
||||||
|
'Organism': heatmap_data['organisms'],
|
||||||
|
'Avg Km (mM)': heatmap_data['average_km'],
|
||||||
|
'Optimal pH': heatmap_data['optimal_ph'],
|
||||||
|
'Optimal Temp (°C)': heatmap_data['optimal_temperature'],
|
||||||
|
'Data Points': heatmap_data['data_points']
|
||||||
|
})
|
||||||
|
|
||||||
|
# Normalize data for better visualization
|
||||||
|
df_normalized = df.copy()
|
||||||
|
for col in ['Avg Km (mM)', 'Optimal pH', 'Optimal Temp (°C)', 'Data Points']:
|
||||||
|
if col in df.columns:
|
||||||
|
df_normalized[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
|
||||||
|
|
||||||
|
# Create figure
|
||||||
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
|
||||||
|
fig.suptitle(f'Enzyme Properties Heatmap for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||||
|
|
||||||
|
# Plot 1: Raw data heatmap
|
||||||
|
heatmap_data_raw = df.set_index('Organism')[['Avg Km (mM)', 'Optimal pH', 'Optimal Temp (°C)', 'Data Points']].T
|
||||||
|
sns.heatmap(heatmap_data_raw, annot=True, fmt='.2f', cmap='viridis', ax=ax1)
|
||||||
|
ax1.set_title('Raw Values')
|
||||||
|
|
||||||
|
# Plot 2: Normalized data heatmap
|
||||||
|
heatmap_data_norm = df_normalized.set_index('Organism')[['Avg Km (mM)', 'Optimal pH', 'Optimal Temp (°C)', 'Data Points']].T
|
||||||
|
sns.heatmap(heatmap_data_norm, annot=True, fmt='.2f', cmap='viridis', ax=ax2)
|
||||||
|
ax2.set_title('Normalized Values (0-1)')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
|
||||||
|
# Save plot
|
||||||
|
if save_path:
|
||||||
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||||
|
print(f"Heatmap plot saved to {save_path}")
|
||||||
|
|
||||||
|
if show_plot:
|
||||||
|
plt.show()
|
||||||
|
else:
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
return save_path or f"heatmap_{ec_number.replace('.', '_')}.png"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error plotting heatmap: {e}")
|
||||||
|
return save_path
|
||||||
|
|
||||||
|
|
||||||
|
def generate_summary_plots(ec_number: str, save_dir: str = None) -> List[str]:
|
||||||
|
"""Generate a comprehensive set of plots for an enzyme."""
|
||||||
|
validate_dependencies()
|
||||||
|
|
||||||
|
if save_dir is None:
|
||||||
|
save_dir = f"enzyme_plots_{ec_number.replace('.', '_')}"
|
||||||
|
|
||||||
|
# Create save directory
|
||||||
|
Path(save_dir).mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
generated_files = []
|
||||||
|
|
||||||
|
# Generate all plot types
|
||||||
|
plot_functions = [
|
||||||
|
('kinetic_parameters', plot_kinetic_parameters),
|
||||||
|
('ph_profiles', plot_pH_profiles),
|
||||||
|
('temperature_profiles', plot_temperature_profiles),
|
||||||
|
('substrate_specificity', plot_substrate_specificity),
|
||||||
|
('heatmap', plot_heatmap),
|
||||||
|
]
|
||||||
|
|
||||||
|
for plot_name, plot_func in plot_functions:
|
||||||
|
try:
|
||||||
|
save_path = f"{save_dir}/{plot_name}_{ec_number.replace('.', '_')}.png"
|
||||||
|
result_path = plot_func(ec_number, save_path=save_path, show_plot=False)
|
||||||
|
if result_path:
|
||||||
|
generated_files.append(result_path)
|
||||||
|
print(f"Generated {plot_name} plot")
|
||||||
|
else:
|
||||||
|
print(f"Failed to generate {plot_name} plot")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error generating {plot_name} plot: {e}")
|
||||||
|
|
||||||
|
# Generate organism comparison for common model organisms
|
||||||
|
model_organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||||
|
try:
|
||||||
|
save_path = f"{save_dir}/organism_comparison_{ec_number.replace('.', '_')}.png"
|
||||||
|
result_path = plot_organism_comparison(ec_number, model_organisms, save_path=save_path, show_plot=False)
|
||||||
|
if result_path:
|
||||||
|
generated_files.append(result_path)
|
||||||
|
print("Generated organism comparison plot")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error generating organism comparison plot: {e}")
|
||||||
|
|
||||||
|
# Generate Michaelis-Menten plot for most common substrate
|
||||||
|
try:
|
||||||
|
specificity = get_substrate_specificity(ec_number)
|
||||||
|
if specificity:
|
||||||
|
most_common = max(specificity, key=lambda x: x.get('data_points', 0))
|
||||||
|
substrate_name = most_common['name'].split()[0] # Take first word
|
||||||
|
save_path = f"{save_dir}/michaelis_menten_{ec_number.replace('.', '_')}_{substrate_name}.png"
|
||||||
|
result_path = plot_michaelis_menten(ec_number, substrate_name, save_path=save_path, show_plot=False)
|
||||||
|
if result_path:
|
||||||
|
generated_files.append(result_path)
|
||||||
|
print(f"Generated Michaelis-Menten plot for {substrate_name}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error generating Michaelis-Menten plot: {e}")
|
||||||
|
|
||||||
|
print(f"\nGenerated {len(generated_files)} plots in directory: {save_dir}")
|
||||||
|
return generated_files
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Example usage
|
||||||
|
print("BRENDA Visualization Examples")
|
||||||
|
print("=" * 40)
|
||||||
|
|
||||||
|
try:
|
||||||
|
ec_number = "1.1.1.1" # Alcohol dehydrogenase
|
||||||
|
|
||||||
|
print(f"\n1. Generating kinetic parameters plot for EC {ec_number}")
|
||||||
|
plot_kinetic_parameters(ec_number, show_plot=False)
|
||||||
|
|
||||||
|
print(f"\n2. Generating pH profile plot for EC {ec_number}")
|
||||||
|
plot_pH_profiles(ec_number, show_plot=False)
|
||||||
|
|
||||||
|
print(f"\n3. Generating substrate specificity plot for EC {ec_number}")
|
||||||
|
plot_substrate_specificity(ec_number, show_plot=False)
|
||||||
|
|
||||||
|
print(f"\n4. Generating Michaelis-Menten plot for EC {ec_number}")
|
||||||
|
plot_michaelis_menten(ec_number, substrate="ethanol", show_plot=False)
|
||||||
|
|
||||||
|
print(f"\n5. Generating organism comparison plot for EC {ec_number}")
|
||||||
|
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||||
|
plot_organism_comparison(ec_number, organisms, show_plot=False)
|
||||||
|
|
||||||
|
print(f"\n6. Generating comprehensive summary plots for EC {ec_number}")
|
||||||
|
summary_files = generate_summary_plots(ec_number, show_plot=False)
|
||||||
|
print(f"Generated {len(summary_files)} summary plots")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Example failed: {e}")
|
||||||
1053
scientific-skills/brenda-database/scripts/enzyme_pathway_builder.py
Normal file
1053
scientific-skills/brenda-database/scripts/enzyme_pathway_builder.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user