mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-01-26 16:58:56 +08:00
Add BRENDA database skill for enzyme research and analysis
- Add comprehensive BRENDA database skill with API integration
- Include enzyme data retrieval, pathway analysis, and visualization
- Support for enzyme queries, kinetic parameters, and taxonomy data
- Add visualization scripts for enzyme pathways and kinetics
This commit is contained in:
844
scientific-skills/brenda-database/scripts/brenda_queries.py
Normal file
844
scientific-skills/brenda-database/scripts/brenda_queries.py
Normal file
@@ -0,0 +1,844 @@
|
||||
"""
|
||||
BRENDA Database Query Utilities
|
||||
|
||||
This module provides high-level functions for querying and analyzing
|
||||
enzyme data from the BRENDA database using the SOAP API.
|
||||
|
||||
Key features:
|
||||
- Parse BRENDA response data entries
|
||||
- Search for enzymes by substrate/product
|
||||
- Compare enzyme properties across organisms
|
||||
- Retrieve kinetic parameters and environmental conditions
|
||||
- Analyze substrate specificity and inhibition
|
||||
- Support for enzyme engineering and pathway design
|
||||
- Export data in various formats
|
||||
|
||||
Installation:
|
||||
uv pip install zeep requests pandas
|
||||
|
||||
Usage:
|
||||
from scripts.brenda_queries import search_enzymes_by_substrate, compare_across_organisms
|
||||
|
||||
enzymes = search_enzymes_by_substrate("glucose", limit=20)
|
||||
comparison = compare_across_organisms("1.1.1.1", ["E. coli", "S. cerevisiae"])
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
import json
|
||||
import csv
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from zeep import Client, Settings
|
||||
from zeep.exceptions import Fault, TransportError
|
||||
ZEEP_AVAILABLE = True
|
||||
except ImportError:
|
||||
print("Warning: zeep not installed. Install with: uv pip install zeep")
|
||||
ZEEP_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import requests
|
||||
REQUESTS_AVAILABLE = True
|
||||
except ImportError:
|
||||
print("Warning: requests not installed. Install with: uv pip install requests")
|
||||
REQUESTS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
PANDAS_AVAILABLE = True
|
||||
except ImportError:
|
||||
print("Warning: pandas not installed. Install with: uv pip install pandas")
|
||||
PANDAS_AVAILABLE = False
|
||||
|
||||
# Import the brenda_client from the project root
|
||||
import sys
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent.parent))
|
||||
|
||||
try:
|
||||
from brenda_client import get_km_values, get_reactions, call_brenda
|
||||
BRENDA_CLIENT_AVAILABLE = True
|
||||
except ImportError:
|
||||
print("Warning: brenda_client not available")
|
||||
BRENDA_CLIENT_AVAILABLE = False
|
||||
|
||||
|
||||
def validate_dependencies():
|
||||
"""Validate that required dependencies are installed."""
|
||||
missing = []
|
||||
if not ZEEP_AVAILABLE:
|
||||
missing.append("zeep")
|
||||
if not REQUESTS_AVAILABLE:
|
||||
missing.append("requests")
|
||||
if not BRENDA_CLIENT_AVAILABLE:
|
||||
missing.append("brenda_client")
|
||||
if missing:
|
||||
raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
|
||||
|
||||
|
||||
def parse_km_entry(entry: str) -> Dict[str, Any]:
|
||||
"""Parse a BRENDA Km value entry into structured data."""
|
||||
if not entry or not isinstance(entry, str):
|
||||
return {}
|
||||
|
||||
parsed = {}
|
||||
parts = entry.split('#')
|
||||
|
||||
for part in parts:
|
||||
if '*' in part:
|
||||
key, value = part.split('*', 1)
|
||||
parsed[key.strip()] = value.strip()
|
||||
|
||||
# Extract numeric values from kmValue
|
||||
if 'kmValue' in parsed:
|
||||
km_value = parsed['kmValue']
|
||||
# Extract first numeric value (in mM typically)
|
||||
numeric_match = re.search(r'(\d+\.?\d*)', km_value)
|
||||
if numeric_match:
|
||||
parsed['km_value_numeric'] = float(numeric_match.group(1))
|
||||
|
||||
# Extract pH from commentary
|
||||
if 'commentary' in parsed:
|
||||
commentary = parsed['commentary']
|
||||
ph_match = re.search(r'pH\s*([0-9.]+)', commentary)
|
||||
if ph_match:
|
||||
parsed['ph'] = float(ph_match.group(1))
|
||||
|
||||
temp_match = re.search(r'(\d+)\s*°?C', commentary)
|
||||
if temp_match:
|
||||
parsed['temperature'] = float(temp_match.group(1))
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def parse_reaction_entry(entry: str) -> Dict[str, Any]:
|
||||
"""Parse a BRENDA reaction entry into structured data."""
|
||||
if not entry or not isinstance(entry, str):
|
||||
return {}
|
||||
|
||||
parsed = {}
|
||||
parts = entry.split('#')
|
||||
|
||||
for part in parts:
|
||||
if '*' in part:
|
||||
key, value = part.split('*', 1)
|
||||
parsed[key.strip()] = value.strip()
|
||||
|
||||
# Parse reaction equation
|
||||
if 'reaction' in parsed:
|
||||
reaction = parsed['reaction']
|
||||
# Extract reactants and products
|
||||
if '<=>' in reaction:
|
||||
reactants, products = reaction.split('<=>', 1)
|
||||
elif '->' in reaction:
|
||||
reactants, products = reaction.split('->', 1)
|
||||
elif '=' in reaction:
|
||||
reactants, products = reaction.split('=', 1)
|
||||
else:
|
||||
reactants, products = reaction, ''
|
||||
|
||||
parsed['reactants'] = [r.strip() for r in reactants.split('+')]
|
||||
parsed['products'] = [p.strip() for p in products.split('+')]
|
||||
|
||||
return parsed
|
||||
|
||||
|
||||
def extract_organism_data(entry: str) -> Dict[str, Any]:
|
||||
"""Extract organism-specific information from BRENDA entry."""
|
||||
parsed = parse_km_entry(entry) if 'kmValue' in entry else parse_reaction_entry(entry)
|
||||
|
||||
if 'organism' in parsed:
|
||||
return {
|
||||
'organism': parsed['organism'],
|
||||
'ec_number': parsed.get('ecNumber', ''),
|
||||
'substrate': parsed.get('substrate', ''),
|
||||
'km_value': parsed.get('kmValue', ''),
|
||||
'km_numeric': parsed.get('km_value_numeric', None),
|
||||
'ph': parsed.get('ph', None),
|
||||
'temperature': parsed.get('temperature', None),
|
||||
'commentary': parsed.get('commentary', ''),
|
||||
'literature': parsed.get('literature', '')
|
||||
}
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def search_enzymes_by_substrate(substrate: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Search for enzymes that act on a specific substrate."""
|
||||
validate_dependencies()
|
||||
|
||||
enzymes = []
|
||||
|
||||
# Search for Km values with the substrate
|
||||
try:
|
||||
km_data = get_km_values("*", substrate=substrate)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
for entry in km_data[:limit]:
|
||||
parsed = parse_km_entry(entry)
|
||||
if parsed:
|
||||
enzymes.append({
|
||||
'ec_number': parsed.get('ecNumber', ''),
|
||||
'organism': parsed.get('organism', ''),
|
||||
'substrate': parsed.get('substrate', ''),
|
||||
'km_value': parsed.get('kmValue', ''),
|
||||
'km_numeric': parsed.get('km_value_numeric', None),
|
||||
'commentary': parsed.get('commentary', '')
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error searching enzymes by substrate: {e}")
|
||||
|
||||
# Remove duplicates based on EC number and organism
|
||||
unique_enzymes = []
|
||||
seen = set()
|
||||
for enzyme in enzymes:
|
||||
key = (enzyme['ec_number'], enzyme['organism'])
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_enzymes.append(enzyme)
|
||||
|
||||
return unique_enzymes[:limit]
|
||||
|
||||
|
||||
def search_enzymes_by_product(product: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Search for enzymes that produce a specific product."""
|
||||
validate_dependencies()
|
||||
|
||||
enzymes = []
|
||||
|
||||
# Search for reactions containing the product
|
||||
try:
|
||||
# This is a simplified approach - in practice you might need
|
||||
# more sophisticated pattern matching for products
|
||||
reactions = get_reactions("*", reaction=f"*{product}*")
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
for entry in reactions[:limit]:
|
||||
parsed = parse_reaction_entry(entry)
|
||||
if parsed and 'products' in parsed:
|
||||
# Check if our target product is in the products list
|
||||
if any(product.lower() in prod.lower() for prod in parsed['products']):
|
||||
enzymes.append({
|
||||
'ec_number': parsed.get('ecNumber', ''),
|
||||
'organism': parsed.get('organism', ''),
|
||||
'reaction': parsed.get('reaction', ''),
|
||||
'reactants': parsed.get('reactants', []),
|
||||
'products': parsed.get('products', []),
|
||||
'commentary': parsed.get('commentary', '')
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"Error searching enzymes by product: {e}")
|
||||
|
||||
return enzymes[:limit]
|
||||
|
||||
|
||||
def compare_across_organisms(ec_number: str, organisms: List[str]) -> List[Dict[str, Any]]:
|
||||
"""Compare enzyme properties across different organisms."""
|
||||
validate_dependencies()
|
||||
|
||||
comparison = []
|
||||
|
||||
for organism in organisms:
|
||||
try:
|
||||
# Get Km data for this organism
|
||||
km_data = get_km_values(ec_number, organism=organism)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
if km_data:
|
||||
# Calculate statistics
|
||||
numeric_kms = []
|
||||
phs = []
|
||||
temperatures = []
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'km_value_numeric' in parsed:
|
||||
numeric_kms.append(parsed['km_value_numeric'])
|
||||
if 'ph' in parsed:
|
||||
phs.append(parsed['ph'])
|
||||
if 'temperature' in parsed:
|
||||
temperatures.append(parsed['temperature'])
|
||||
|
||||
org_data = {
|
||||
'organism': organism,
|
||||
'ec_number': ec_number,
|
||||
'data_points': len(km_data),
|
||||
'average_km': sum(numeric_kms) / len(numeric_kms) if numeric_kms else None,
|
||||
'min_km': min(numeric_kms) if numeric_kms else None,
|
||||
'max_km': max(numeric_kms) if numeric_kms else None,
|
||||
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||||
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||||
'temperature_range': (min(temperatures), max(temperatures)) if temperatures else None
|
||||
}
|
||||
|
||||
comparison.append(org_data)
|
||||
else:
|
||||
comparison.append({
|
||||
'organism': organism,
|
||||
'ec_number': ec_number,
|
||||
'data_points': 0,
|
||||
'note': 'No data found'
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error comparing organism {organism}: {e}")
|
||||
comparison.append({
|
||||
'organism': organism,
|
||||
'ec_number': ec_number,
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
return comparison
|
||||
|
||||
|
||||
def get_organisms_for_enzyme(ec_number: str) -> List[str]:
|
||||
"""Get list of organisms that have data for a specific enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
km_data = get_km_values(ec_number)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
organisms = set()
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'organism' in parsed:
|
||||
organisms.add(parsed['organism'])
|
||||
|
||||
return sorted(list(organisms))
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting organisms for enzyme {ec_number}: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def get_environmental_parameters(ec_number: str) -> Dict[str, Any]:
|
||||
"""Get environmental parameters (pH, temperature) for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
km_data = get_km_values(ec_number)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
phs = []
|
||||
temperatures = []
|
||||
ph_stabilities = []
|
||||
temp_stabilities = []
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
|
||||
if 'ph' in parsed:
|
||||
phs.append(parsed['ph'])
|
||||
if 'temperature' in parsed:
|
||||
temperatures.append(parsed['temperature'])
|
||||
|
||||
# Check commentary for stability information
|
||||
commentary = parsed.get('commentary', '').lower()
|
||||
if 'stable' in commentary and 'ph' in commentary:
|
||||
# Extract pH stability range
|
||||
ph_range_match = re.search(r'ph\s*([\d.]+)\s*[-–]\s*([\d.]+)', commentary)
|
||||
if ph_range_match:
|
||||
ph_stabilities.append((float(ph_range_match.group(1)), float(ph_range_match.group(2))))
|
||||
|
||||
if 'stable' in commentary and ('temp' in commentary or '°c' in commentary):
|
||||
# Extract temperature stability
|
||||
temp_match = re.search(r'(\d+)\s*[-–]\s*(\d+)\s*°?c', commentary)
|
||||
if temp_match:
|
||||
temp_stabilities.append((int(temp_match.group(1)), int(temp_match.group(2))))
|
||||
|
||||
params = {
|
||||
'ec_number': ec_number,
|
||||
'data_points': len(km_data),
|
||||
'ph_range': (min(phs), max(phs)) if phs else None,
|
||||
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||||
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||||
'temperature_range': (min(temperatures), max(temperatures)) if temperatures else None,
|
||||
'stability_ph': ph_stabilities[0] if ph_stabilities else None,
|
||||
'temperature_stability': temp_stabilities[0] if temp_stabilities else None
|
||||
}
|
||||
|
||||
return params
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting environmental parameters for {ec_number}: {e}")
|
||||
return {'ec_number': ec_number, 'error': str(e)}
|
||||
|
||||
|
||||
def get_cofactor_requirements(ec_number: str) -> List[Dict[str, Any]]:
|
||||
"""Get cofactor requirements for an enzyme from reaction data."""
|
||||
validate_dependencies()
|
||||
|
||||
cofactors = []
|
||||
|
||||
try:
|
||||
reactions = get_reactions(ec_number)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
for entry in reactions:
|
||||
parsed = parse_reaction_entry(entry)
|
||||
if parsed and 'reactants' in parsed:
|
||||
# Look for common cofactors in reactants
|
||||
common_cofactors = [
|
||||
'NAD+', 'NADH', 'NADP+', 'NADPH',
|
||||
'ATP', 'ADP', 'AMP',
|
||||
'FAD', 'FADH2',
|
||||
'CoA', 'acetyl-CoA',
|
||||
'pyridoxal phosphate', 'PLP',
|
||||
'biotin',
|
||||
'heme', 'iron-sulfur'
|
||||
]
|
||||
|
||||
for reactant in parsed['reactants']:
|
||||
for cofactor in common_cofactors:
|
||||
if cofactor.lower() in reactant.lower():
|
||||
cofactors.append({
|
||||
'name': cofactor,
|
||||
'full_name': reactant,
|
||||
'type': 'oxidoreductase' if 'NAD' in cofactor else 'other',
|
||||
'organism': parsed.get('organism', ''),
|
||||
'ec_number': ec_number
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting cofactor requirements for {ec_number}: {e}")
|
||||
|
||||
# Remove duplicates
|
||||
unique_cofactors = []
|
||||
seen = set()
|
||||
for cofactor in cofactors:
|
||||
key = (cofactor['name'], cofactor['organism'])
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_cofactors.append(cofactor)
|
||||
|
||||
return unique_cofactors
|
||||
|
||||
|
||||
def get_substrate_specificity(ec_number: str) -> List[Dict[str, Any]]:
|
||||
"""Get substrate specificity data for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
specificity = []
|
||||
|
||||
try:
|
||||
km_data = get_km_values(ec_number)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
substrate_data = {}
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'substrate' in parsed and 'km_value_numeric' in parsed:
|
||||
substrate = parsed['substrate']
|
||||
if substrate not in substrate_data:
|
||||
substrate_data[substrate] = {
|
||||
'name': substrate,
|
||||
'km_values': [],
|
||||
'organisms': set(),
|
||||
'vmax_values': [], # If available
|
||||
'kcat_values': [] # If available
|
||||
}
|
||||
|
||||
substrate_data[substrate]['km_values'].append(parsed['km_value_numeric'])
|
||||
if 'organism' in parsed:
|
||||
substrate_data[substrate]['organisms'].add(parsed['organism'])
|
||||
|
||||
# Calculate summary statistics
|
||||
for substrate, data in substrate_data.items():
|
||||
if data['km_values']:
|
||||
specificity.append({
|
||||
'name': substrate,
|
||||
'km': sum(data['km_values']) / len(data['km_values']),
|
||||
'min_km': min(data['km_values']),
|
||||
'max_km': max(data['km_values']),
|
||||
'data_points': len(data['km_values']),
|
||||
'organisms': list(data['organisms']),
|
||||
'vmax': sum(data['vmax_values']) / len(data['vmax_values']) if data['vmax_values'] else None,
|
||||
'kcat': sum(data['kcat_values']) / len(data['kcat_values']) if data['kcat_values'] else None,
|
||||
'kcat_km_ratio': None # Would need kcat data to calculate
|
||||
})
|
||||
|
||||
# Sort by Km (lower is better affinity)
|
||||
specificity.sort(key=lambda x: x['km'] if x['km'] else float('inf'))
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting substrate specificity for {ec_number}: {e}")
|
||||
|
||||
return specificity
|
||||
|
||||
|
||||
def compare_substrate_affinity(ec_number: str) -> List[Dict[str, Any]]:
|
||||
"""Compare substrate affinity for an enzyme."""
|
||||
return get_substrate_specificity(ec_number)
|
||||
|
||||
|
||||
def get_inhibitors(ec_number: str) -> List[Dict[str, Any]]:
|
||||
"""Get inhibitor information for an enzyme (from commentary)."""
|
||||
validate_dependencies()
|
||||
|
||||
inhibitors = []
|
||||
|
||||
try:
|
||||
km_data = get_km_values(ec_number)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
commentary = parsed.get('commentary', '').lower()
|
||||
|
||||
# Look for inhibitor keywords
|
||||
inhibitor_keywords = ['inhibited', 'inhibition', 'blocked', 'prevented', 'reduced']
|
||||
if any(keyword in commentary for keyword in inhibitor_keywords):
|
||||
# Try to extract inhibitor names (this is approximate)
|
||||
# Common inhibitors
|
||||
common_inhibitors = [
|
||||
'iodoacetate', 'n-ethylmaleimide', 'p-chloromercuribenzoate',
|
||||
'heavy metals', 'mercury', 'copper', 'zinc',
|
||||
'cyanide', 'azide', 'carbon monoxide',
|
||||
'edta', 'egta'
|
||||
]
|
||||
|
||||
for inhibitor in common_inhibitors:
|
||||
if inhibitor in commentary:
|
||||
inhibitors.append({
|
||||
'name': inhibitor,
|
||||
'type': 'irreversible' if 'iodoacetate' in inhibitor or 'maleimide' in inhibitor else 'reversible',
|
||||
'organism': parsed.get('organism', ''),
|
||||
'ec_number': ec_number,
|
||||
'commentary': parsed.get('commentary', '')
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting inhibitors for {ec_number}: {e}")
|
||||
|
||||
# Remove duplicates
|
||||
unique_inhibitors = []
|
||||
seen = set()
|
||||
for inhibitor in inhibitors:
|
||||
key = (inhibitor['name'], inhibitor['organism'])
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_inhibitors.append(inhibitor)
|
||||
|
||||
return unique_inhibitors
|
||||
|
||||
|
||||
def get_activators(ec_number: str) -> List[Dict[str, Any]]:
|
||||
"""Get activator information for an enzyme (from commentary)."""
|
||||
validate_dependencies()
|
||||
|
||||
activators = []
|
||||
|
||||
try:
|
||||
km_data = get_km_values(ec_number)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
commentary = parsed.get('commentary', '').lower()
|
||||
|
||||
# Look for activator keywords
|
||||
activator_keywords = ['activated', 'stimulated', 'enhanced', 'increased']
|
||||
if any(keyword in commentary for keyword in activator_keywords):
|
||||
# Try to extract activator names (this is approximate)
|
||||
common_activators = [
|
||||
'mg2+', 'mn2+', 'ca2+', 'zn2+',
|
||||
'k+', 'na+',
|
||||
'phosphate', 'pyrophosphate',
|
||||
'dithiothreitol', 'dtt',
|
||||
'β-mercaptoethanol'
|
||||
]
|
||||
|
||||
for activator in common_activators:
|
||||
if activator in commentary:
|
||||
activators.append({
|
||||
'name': activator,
|
||||
'type': 'metal ion' if '+' in activator else 'reducing agent' if 'dtt' in activator.lower() or 'mercapto' in activator.lower() else 'other',
|
||||
'mechanism': 'allosteric' if 'allosteric' in commentary else 'cofactor' else 'unknown',
|
||||
'organism': parsed.get('organism', ''),
|
||||
'ec_number': ec_number,
|
||||
'commentary': parsed.get('commentary', '')
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting activators for {ec_number}: {e}")
|
||||
|
||||
# Remove duplicates
|
||||
unique_activators = []
|
||||
seen = set()
|
||||
for activator in activators:
|
||||
key = (activator['name'], activator['organism'])
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_activators.append(activator)
|
||||
|
||||
return unique_activators
|
||||
|
||||
|
||||
def find_thermophilic_homologs(ec_number: str, min_temp: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Find thermophilic homologs of an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
thermophilic = []
|
||||
|
||||
try:
|
||||
organisms = get_organisms_for_enzyme(ec_number)
|
||||
|
||||
for organism in organisms:
|
||||
# Check if organism might be thermophilic based on name
|
||||
thermophilic_keywords = ['therm', 'hypertherm', 'pyro']
|
||||
if any(keyword in organism.lower() for keyword in thermophilic_keywords):
|
||||
# Get kinetic data to extract temperature information
|
||||
km_data = get_km_values(ec_number, organism=organism)
|
||||
time.sleep(0.2) # Rate limiting
|
||||
|
||||
temperatures = []
|
||||
kms = []
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'temperature' in parsed:
|
||||
temperatures.append(parsed['temperature'])
|
||||
if 'km_value_numeric' in parsed:
|
||||
kms.append(parsed['km_value_numeric'])
|
||||
|
||||
if temperatures and max(temperatures) >= min_temp:
|
||||
thermophilic.append({
|
||||
'organism': organism,
|
||||
'ec_number': ec_number,
|
||||
'optimal_temperature': max(temperatures),
|
||||
'temperature_range': (min(temperatures), max(temperatures)),
|
||||
'km': sum(kms) / len(kms) if kms else None,
|
||||
'data_points': len(km_data)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error finding thermophilic homologs for {ec_number}: {e}")
|
||||
|
||||
return thermophilic
|
||||
|
||||
|
||||
def find_ph_stable_variants(ec_number: str, min_ph: float = 8.0, max_ph: float = 6.0) -> List[Dict[str, Any]]:
|
||||
"""Find pH-stable variants of an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
ph_stable = []
|
||||
|
||||
try:
|
||||
organisms = get_organisms_for_enzyme(ec_number)
|
||||
|
||||
for organism in organisms:
|
||||
km_data = get_km_values(ec_number, organism=organism)
|
||||
time.sleep(0.2) # Rate limiting
|
||||
|
||||
phs = []
|
||||
kms = []
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'ph' in parsed:
|
||||
phs.append(parsed['ph'])
|
||||
if 'km_value_numeric' in parsed:
|
||||
kms.append(parsed['km_value_numeric'])
|
||||
|
||||
if phs:
|
||||
ph_range = (min(phs), max(phs))
|
||||
is_alkaline_stable = min_ph and ph_range[0] >= min_ph
|
||||
is_acid_stable = max_ph and ph_range[1] <= max_ph
|
||||
|
||||
if is_alkaline_stable or is_acid_stable:
|
||||
ph_stable.append({
|
||||
'organism': organism,
|
||||
'ec_number': ec_number,
|
||||
'ph_range': ph_range,
|
||||
'optimal_ph': sum(phs) / len(phs),
|
||||
'km': sum(kms) / len(kms) if kms else None,
|
||||
'stability_type': 'alkaline' if is_alkaline_stable else 'acidic',
|
||||
'data_points': len(km_data)
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error finding pH-stable variants for {ec_number}: {e}")
|
||||
|
||||
return ph_stable
|
||||
|
||||
|
||||
def get_modeling_parameters(ec_number: str, substrate: str = None) -> Dict[str, Any]:
|
||||
"""Get parameters suitable for kinetic modeling."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
if substrate:
|
||||
km_data = get_km_values(ec_number, substrate=substrate)
|
||||
else:
|
||||
km_data = get_km_values(ec_number)
|
||||
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
if not km_data:
|
||||
return {'ec_number': ec_number, 'error': 'No kinetic data found'}
|
||||
|
||||
# Extract modeling parameters
|
||||
kms = []
|
||||
phs = []
|
||||
temperatures = []
|
||||
v_max_values = []
|
||||
kcat_values = []
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
|
||||
if 'km_value_numeric' in parsed:
|
||||
kms.append(parsed['km_value_numeric'])
|
||||
if 'ph' in parsed:
|
||||
phs.append(parsed['ph'])
|
||||
if 'temperature' in parsed:
|
||||
temperatures.append(parsed['temperature'])
|
||||
|
||||
# Look for Vmax and kcat in commentary (rare in BRENDA)
|
||||
commentary = parsed.get('commentary', '').lower()
|
||||
vmax_match = re.search(r'vmax\s*=\s*([\d.]+)', commentary)
|
||||
if vmax_match:
|
||||
v_max_values.append(float(vmax_match.group(1)))
|
||||
|
||||
kcat_match = re.search(r'kcat\s*=\s*([\d.]+)', commentary)
|
||||
if kcat_match:
|
||||
kcat_values.append(float(kcat_match.group(1)))
|
||||
|
||||
modeling_data = {
|
||||
'ec_number': ec_number,
|
||||
'substrate': substrate if substrate else 'various',
|
||||
'km': sum(kms) / len(kms) if kms else None,
|
||||
'km_std': (sum((x - sum(kms)/len(kms))**2 for x in kms) / len(kms))**0.5 if kms else None,
|
||||
'vmax': sum(v_max_values) / len(v_max_values) if v_max_values else None,
|
||||
'kcat': sum(kcat_values) / len(kcat_values) if kcat_values else None,
|
||||
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||||
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||||
'data_points': len(km_data),
|
||||
'temperature': sum(temperatures) / len(temperatures) if temperatures else 25.0, # Default to 25°C
|
||||
'ph': sum(phs) / len(phs) if phs else 7.0, # Default to pH 7.0
|
||||
'enzyme_conc': 1.0, # Default enzyme concentration (μM)
|
||||
'substrate_conc': None, # Would be set by user
|
||||
}
|
||||
|
||||
return modeling_data
|
||||
|
||||
except Exception as e:
|
||||
return {'ec_number': ec_number, 'error': str(e)}
|
||||
|
||||
|
||||
def export_kinetic_data(ec_number: str, format: str = 'csv', filename: str = None) -> str:
|
||||
"""Export kinetic data to file."""
|
||||
validate_dependencies()
|
||||
|
||||
if not filename:
|
||||
filename = f"brenda_kinetic_data_{ec_number.replace('.', '_')}.{format}"
|
||||
|
||||
try:
|
||||
# Get all kinetic data
|
||||
km_data = get_km_values(ec_number)
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
if not km_data:
|
||||
print(f"No kinetic data found for EC {ec_number}")
|
||||
return filename
|
||||
|
||||
# Parse all entries
|
||||
parsed_data = []
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if parsed:
|
||||
parsed_data.append(parsed)
|
||||
|
||||
# Export based on format
|
||||
if format.lower() == 'csv':
|
||||
if parsed_data:
|
||||
df = pd.DataFrame(parsed_data)
|
||||
df.to_csv(filename, index=False)
|
||||
else:
|
||||
with open(filename, 'w', newline='') as f:
|
||||
f.write('No data found')
|
||||
|
||||
elif format.lower() == 'json':
|
||||
with open(filename, 'w') as f:
|
||||
json.dump(parsed_data, f, indent=2, default=str)
|
||||
|
||||
elif format.lower() == 'excel':
|
||||
if parsed_data and PANDAS_AVAILABLE:
|
||||
df = pd.DataFrame(parsed_data)
|
||||
df.to_excel(filename, index=False)
|
||||
else:
|
||||
print("pandas required for Excel export")
|
||||
return filename
|
||||
|
||||
print(f"Exported {len(parsed_data)} entries to {filename}")
|
||||
return filename
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error exporting data: {e}")
|
||||
return filename
|
||||
|
||||
|
||||
def search_by_pattern(pattern: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Search enzymes using a reaction pattern or keyword."""
|
||||
validate_dependencies()
|
||||
|
||||
enzymes = []
|
||||
|
||||
try:
|
||||
# Search reactions containing the pattern
|
||||
reactions = get_reactions("*", reaction=f"*{pattern}*")
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
for entry in reactions[:limit]:
|
||||
parsed = parse_reaction_entry(entry)
|
||||
if parsed:
|
||||
enzymes.append({
|
||||
'ec_number': parsed.get('ecNumber', ''),
|
||||
'organism': parsed.get('organism', ''),
|
||||
'reaction': parsed.get('reaction', ''),
|
||||
'reactants': parsed.get('reactants', []),
|
||||
'products': parsed.get('products', []),
|
||||
'commentary': parsed.get('commentary', '')
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error searching by pattern '{pattern}': {e}")
|
||||
|
||||
return enzymes
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
print("BRENDA Database Query Examples")
|
||||
print("=" * 40)
|
||||
|
||||
try:
|
||||
# Example 1: Search enzymes by substrate
|
||||
print("\n1. Searching enzymes for 'glucose':")
|
||||
enzymes = search_enzymes_by_substrate("glucose", limit=5)
|
||||
for enzyme in enzymes:
|
||||
print(f" EC {enzyme['ec_number']}: {enzyme['organism']}")
|
||||
print(f" Km: {enzyme['km_value']}")
|
||||
|
||||
# Example 2: Compare across organisms
|
||||
print("\n2. Comparing alcohol dehydrogenase (1.1.1.1) across organisms:")
|
||||
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||
comparison = compare_across_organisms("1.1.1.1", organisms)
|
||||
for comp in comparison:
|
||||
if comp.get('data_points', 0) > 0:
|
||||
print(f" {comp['organism']}:")
|
||||
print(f" Avg Km: {comp.get('average_km', 'N/A')}")
|
||||
print(f" Optimal pH: {comp.get('optimal_ph', 'N/A')}")
|
||||
|
||||
# Example 3: Get environmental parameters
|
||||
print("\n3. Environmental parameters for 1.1.1.1:")
|
||||
params = get_environmental_parameters("1.1.1.1")
|
||||
if params.get('data_points', 0) > 0:
|
||||
print(f" pH range: {params.get('ph_range', 'N/A')}")
|
||||
print(f" Temperature range: {params.get('temperature_range', 'N/A')}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Example failed: {e}")
|
||||
@@ -0,0 +1,772 @@
|
||||
"""
|
||||
BRENDA Database Visualization Utilities
|
||||
|
||||
This module provides visualization functions for BRENDA enzyme data,
|
||||
including kinetic parameters, environmental conditions, and pathway analysis.
|
||||
|
||||
Key features:
|
||||
- Plot Km, kcat, and Vmax distributions
|
||||
- Compare enzyme properties across organisms
|
||||
- Visualize pH and temperature activity profiles
|
||||
- Plot substrate specificity and affinity data
|
||||
- Generate Michaelis-Menten curves
|
||||
- Create heatmaps and correlation plots
|
||||
- Support for pathway visualization
|
||||
|
||||
Installation:
|
||||
uv pip install matplotlib seaborn pandas numpy
|
||||
|
||||
Usage:
|
||||
from scripts.brenda_visualization import plot_kinetic_parameters, plot_michaelis_menten
|
||||
|
||||
plot_kinetic_parameters("1.1.1.1")
|
||||
plot_michaelis_menten("1.1.1.1", substrate="ethanol")
|
||||
"""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
PANDAS_AVAILABLE = True
|
||||
except ImportError:
|
||||
print("Warning: pandas not installed. Install with: uv pip install pandas")
|
||||
PANDAS_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from brenda_queries import (
|
||||
get_km_values, get_reactions, parse_km_entry, parse_reaction_entry,
|
||||
compare_across_organisms, get_environmental_parameters,
|
||||
get_substrate_specificity, get_modeling_parameters,
|
||||
search_enzymes_by_substrate, search_by_pattern
|
||||
)
|
||||
BRENDA_QUERIES_AVAILABLE = True
|
||||
except ImportError:
|
||||
print("Warning: brenda_queries not available")
|
||||
BRENDA_QUERIES_AVAILABLE = False
|
||||
|
||||
|
||||
# Set style for plots
|
||||
plt.style.use('default')
|
||||
sns.set_palette("husl")
|
||||
|
||||
|
||||
def validate_dependencies():
|
||||
"""Validate that required dependencies are installed."""
|
||||
missing = []
|
||||
if not PANDAS_AVAILABLE:
|
||||
missing.append("pandas")
|
||||
if not BRENDA_QUERIES_AVAILABLE:
|
||||
missing.append("brenda_queries")
|
||||
if missing:
|
||||
raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
|
||||
|
||||
|
||||
def plot_kinetic_parameters(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||
"""Plot kinetic parameter distributions for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
# Get Km data
|
||||
km_data = get_km_values(ec_number)
|
||||
|
||||
if not km_data:
|
||||
print(f"No kinetic data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Parse data
|
||||
parsed_entries = []
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'km_value_numeric' in parsed:
|
||||
parsed_entries.append(parsed)
|
||||
|
||||
if not parsed_entries:
|
||||
print(f"No numeric Km data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Create figure with subplots
|
||||
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||||
fig.suptitle(f'Kinetic Parameters for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||
|
||||
# Extract data
|
||||
km_values = [entry['km_value_numeric'] for entry in parsed_entries]
|
||||
organisms = [entry.get('organism', 'Unknown') for entry in parsed_entries]
|
||||
substrates = [entry.get('substrate', 'Unknown') for entry in parsed_entries]
|
||||
|
||||
# Plot 1: Km distribution histogram
|
||||
ax1.hist(km_values, bins=30, alpha=0.7, edgecolor='black')
|
||||
ax1.set_xlabel('Km (mM)')
|
||||
ax1.set_ylabel('Frequency')
|
||||
ax1.set_title('Km Value Distribution')
|
||||
ax1.axvline(np.mean(km_values), color='red', linestyle='--', label=f'Mean: {np.mean(km_values):.2f}')
|
||||
ax1.axvline(np.median(km_values), color='blue', linestyle='--', label=f'Median: {np.median(km_values):.2f}')
|
||||
ax1.legend()
|
||||
|
||||
# Plot 2: Km by organism (top 10)
|
||||
if PANDAS_AVAILABLE:
|
||||
df = pd.DataFrame({'Km': km_values, 'Organism': organisms})
|
||||
organism_means = df.groupby('Organism')['Km'].mean().sort_values(ascending=False).head(10)
|
||||
|
||||
organism_means.plot(kind='bar', ax=ax2)
|
||||
ax2.set_ylabel('Mean Km (mM)')
|
||||
ax2.set_title('Mean Km by Organism (Top 10)')
|
||||
ax2.tick_params(axis='x', rotation=45)
|
||||
|
||||
# Plot 3: Km by substrate (top 10)
|
||||
if PANDAS_AVAILABLE:
|
||||
df = pd.DataFrame({'Km': km_values, 'Substrate': substrates})
|
||||
substrate_means = df.groupby('Substrate')['Km'].mean().sort_values(ascending=False).head(10)
|
||||
|
||||
substrate_means.plot(kind='bar', ax=ax3)
|
||||
ax3.set_ylabel('Mean Km (mM)')
|
||||
ax3.set_title('Mean Km by Substrate (Top 10)')
|
||||
ax3.tick_params(axis='x', rotation=45)
|
||||
|
||||
# Plot 4: Box plot by organism (top 5)
|
||||
if PANDAS_AVAILABLE:
|
||||
top_organisms = df.groupby('Organism')['Km'].count().sort_values(ascending=False).head(5).index
|
||||
top_data = df[df['Organism'].isin(top_organisms)]
|
||||
|
||||
sns.boxplot(data=top_data, x='Organism', y='Km', ax=ax4)
|
||||
ax4.set_ylabel('Km (mM)')
|
||||
ax4.set_title('Km Distribution by Organism (Top 5)')
|
||||
ax4.tick_params(axis='x', rotation=45)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save plot
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Kinetic parameters plot saved to {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
return save_path or f"kinetic_parameters_{ec_number.replace('.', '_')}.png"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting kinetic parameters: {e}")
|
||||
return save_path
|
||||
|
||||
|
||||
def plot_organism_comparison(ec_number: str, organisms: List[str], save_path: str = None, show_plot: bool = True) -> str:
|
||||
"""Compare enzyme properties across multiple organisms."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
# Get comparison data
|
||||
comparison = compare_across_organisms(ec_number, organisms)
|
||||
|
||||
if not comparison:
|
||||
print(f"No comparison data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Filter out entries with no data
|
||||
valid_data = [c for c in comparison if c.get('data_points', 0) > 0]
|
||||
|
||||
if not valid_data:
|
||||
print(f"No valid data for organism comparison of EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Create figure
|
||||
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||||
fig.suptitle(f'Organism Comparison for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||
|
||||
# Extract data
|
||||
names = [c['organism'] for c in valid_data]
|
||||
avg_kms = [c.get('average_km', 0) for c in valid_data if c.get('average_km')]
|
||||
optimal_phs = [c.get('optimal_ph', 0) for c in valid_data if c.get('optimal_ph')]
|
||||
optimal_temps = [c.get('optimal_temperature', 0) for c in valid_data if c.get('optimal_temperature')]
|
||||
data_points = [c.get('data_points', 0) for c in valid_data]
|
||||
|
||||
# Plot 1: Average Km comparison
|
||||
if avg_kms:
|
||||
ax1.bar(names, avg_kms)
|
||||
ax1.set_ylabel('Average Km (mM)')
|
||||
ax1.set_title('Average Km Comparison')
|
||||
ax1.tick_params(axis='x', rotation=45)
|
||||
|
||||
# Plot 2: Optimal pH comparison
|
||||
if optimal_phs:
|
||||
ax2.bar(names, optimal_phs)
|
||||
ax2.set_ylabel('Optimal pH')
|
||||
ax2.set_title('Optimal pH Comparison')
|
||||
ax2.tick_params(axis='x', rotation=45)
|
||||
|
||||
# Plot 3: Optimal temperature comparison
|
||||
if optimal_temps:
|
||||
ax3.bar(names, optimal_temps)
|
||||
ax3.set_ylabel('Optimal Temperature (°C)')
|
||||
ax3.set_title('Optimal Temperature Comparison')
|
||||
ax3.tick_params(axis='x', rotation=45)
|
||||
|
||||
# Plot 4: Data points comparison
|
||||
ax4.bar(names, data_points)
|
||||
ax4.set_ylabel('Number of Data Points')
|
||||
ax4.set_title('Available Data Points')
|
||||
ax4.tick_params(axis='x', rotation=45)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save plot
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Organism comparison plot saved to {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
return save_path or f"organism_comparison_{ec_number.replace('.', '_')}.png"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting organism comparison: {e}")
|
||||
return save_path
|
||||
|
||||
|
||||
def plot_pH_profiles(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||
"""Plot pH activity profiles for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
# Get kinetic data
|
||||
km_data = get_km_values(ec_number)
|
||||
|
||||
if not km_data:
|
||||
print(f"No pH data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Parse data and extract pH information
|
||||
ph_kms = []
|
||||
ph_organisms = []
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'ph' in parsed and 'km_value_numeric' in parsed:
|
||||
ph_kms.append((parsed['ph'], parsed['km_value_numeric']))
|
||||
ph_organisms.append(parsed.get('organism', 'Unknown'))
|
||||
|
||||
if not ph_kms:
|
||||
print(f"No pH-Km data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Create figure
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||
fig.suptitle(f'pH Activity Profiles for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||
|
||||
# Extract data
|
||||
ph_values = [item[0] for item in ph_kms]
|
||||
km_values = [item[1] for item in ph_kms]
|
||||
|
||||
# Plot 1: pH vs Km scatter plot
|
||||
scatter = ax1.scatter(ph_values, km_values, alpha=0.6, s=50)
|
||||
ax1.set_xlabel('pH')
|
||||
ax1.set_ylabel('Km (mM)')
|
||||
ax1.set_title('pH vs Km Values')
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Add trend line
|
||||
if len(ph_values) > 2:
|
||||
z = np.polyfit(ph_values, km_values, 1)
|
||||
p = np.poly1d(z)
|
||||
ax1.plot(ph_values, p(ph_values), "r--", alpha=0.8, label=f'Trend: y={z[0]:.3f}x+{z[1]:.3f}')
|
||||
ax1.legend()
|
||||
|
||||
# Plot 2: pH distribution histogram
|
||||
ax2.hist(ph_values, bins=20, alpha=0.7, edgecolor='black')
|
||||
ax2.set_xlabel('pH')
|
||||
ax2.set_ylabel('Frequency')
|
||||
ax2.set_title('pH Distribution')
|
||||
ax2.axvline(np.mean(ph_values), color='red', linestyle='--', label=f'Mean: {np.mean(ph_values):.2f}')
|
||||
ax2.axvline(np.median(ph_values), color='blue', linestyle='--', label=f'Median: {np.median(ph_values):.2f}')
|
||||
ax2.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save plot
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"pH profile plot saved to {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
return save_path or f"ph_profile_{ec_number.replace('.', '_')}.png"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting pH profiles: {e}")
|
||||
return save_path
|
||||
|
||||
|
||||
def plot_temperature_profiles(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||
"""Plot temperature activity profiles for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
# Get kinetic data
|
||||
km_data = get_km_values(ec_number)
|
||||
|
||||
if not km_data:
|
||||
print(f"No temperature data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Parse data and extract temperature information
|
||||
temp_kms = []
|
||||
temp_organisms = []
|
||||
|
||||
for entry in km_data:
|
||||
parsed = parse_km_entry(entry)
|
||||
if 'temperature' in parsed and 'km_value_numeric' in parsed:
|
||||
temp_kms.append((parsed['temperature'], parsed['km_value_numeric']))
|
||||
temp_organisms.append(parsed.get('organism', 'Unknown'))
|
||||
|
||||
if not temp_kms:
|
||||
print(f"No temperature-Km data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Create figure
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||
fig.suptitle(f'Temperature Activity Profiles for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||
|
||||
# Extract data
|
||||
temp_values = [item[0] for item in temp_kms]
|
||||
km_values = [item[1] for item in temp_kms]
|
||||
|
||||
# Plot 1: Temperature vs Km scatter plot
|
||||
scatter = ax1.scatter(temp_values, km_values, alpha=0.6, s=50)
|
||||
ax1.set_xlabel('Temperature (°C)')
|
||||
ax1.set_ylabel('Km (mM)')
|
||||
ax1.set_title('Temperature vs Km Values')
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Add trend line
|
||||
if len(temp_values) > 2:
|
||||
z = np.polyfit(temp_values, km_values, 2) # Quadratic fit for temperature optima
|
||||
p = np.poly1d(z)
|
||||
x_smooth = np.linspace(min(temp_values), max(temp_values), 100)
|
||||
ax1.plot(x_smooth, p(x_smooth), "r--", alpha=0.8, label='Polynomial fit')
|
||||
|
||||
# Find optimum temperature
|
||||
optimum_idx = np.argmin(p(x_smooth))
|
||||
optimum_temp = x_smooth[optimum_idx]
|
||||
ax1.axvline(optimum_temp, color='green', linestyle=':', label=f'Optimal: {optimum_temp:.1f}°C')
|
||||
ax1.legend()
|
||||
|
||||
# Plot 2: Temperature distribution histogram
|
||||
ax2.hist(temp_values, bins=20, alpha=0.7, edgecolor='black')
|
||||
ax2.set_xlabel('Temperature (°C)')
|
||||
ax2.set_ylabel('Frequency')
|
||||
ax2.set_title('Temperature Distribution')
|
||||
ax2.axvline(np.mean(temp_values), color='red', linestyle='--', label=f'Mean: {np.mean(temp_values):.1f}°C')
|
||||
ax2.axvline(np.median(temp_values), color='blue', linestyle='--', label=f'Median: {np.median(temp_values):.1f}°C')
|
||||
ax2.legend()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save plot
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Temperature profile plot saved to {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
return save_path or f"temperature_profile_{ec_number.replace('.', '_')}.png"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting temperature profiles: {e}")
|
||||
return save_path
|
||||
|
||||
|
||||
def plot_substrate_specificity(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||
"""Plot substrate specificity and affinity for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
# Get substrate specificity data
|
||||
specificity = get_substrate_specificity(ec_number)
|
||||
|
||||
if not specificity:
|
||||
print(f"No substrate specificity data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
# Create figure
|
||||
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))
|
||||
fig.suptitle(f'Substrate Specificity for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||
|
||||
# Extract data
|
||||
substrates = [s['name'] for s in specificity]
|
||||
kms = [s['km'] for s in specificity if s.get('km')]
|
||||
data_points = [s['data_points'] for s in specificity]
|
||||
|
||||
# Get top substrates for plotting
|
||||
if PANDAS_AVAILABLE and kms:
|
||||
df = pd.DataFrame({'Substrate': substrates, 'Km': kms, 'DataPoints': data_points})
|
||||
top_substrates = df.nlargest(15, 'DataPoints') # Top 15 by data points
|
||||
|
||||
# Plot 1: Km values for top substrates (sorted by affinity)
|
||||
top_sorted = top_substrates.sort_values('Km')
|
||||
ax1.barh(range(len(top_sorted)), top_sorted['Km'])
|
||||
ax1.set_yticks(range(len(top_sorted)))
|
||||
ax1.set_yticklabels([s[:30] + '...' if len(s) > 30 else s for s in top_sorted['Substrate']])
|
||||
ax1.set_xlabel('Km (mM)')
|
||||
ax1.set_title('Substrate Affinity (Lower Km = Higher Affinity)')
|
||||
ax1.invert_yaxis() # Best affinity at top
|
||||
|
||||
# Plot 2: Data points by substrate
|
||||
ax2.barh(range(len(top_sorted)), top_sorted['DataPoints'])
|
||||
ax2.set_yticks(range(len(top_sorted)))
|
||||
ax2.set_yticklabels([s[:30] + '...' if len(s) > 30 else s for s in top_sorted['Substrate']])
|
||||
ax2.set_xlabel('Number of Data Points')
|
||||
ax2.set_title('Data Availability by Substrate')
|
||||
ax2.invert_yaxis()
|
||||
|
||||
# Plot 3: Km distribution
|
||||
ax3.hist(kms, bins=20, alpha=0.7, edgecolor='black')
|
||||
ax3.set_xlabel('Km (mM)')
|
||||
ax3.set_ylabel('Frequency')
|
||||
ax3.set_title('Km Value Distribution')
|
||||
ax3.axvline(np.mean(kms), color='red', linestyle='--', label=f'Mean: {np.mean(kms):.2f}')
|
||||
ax3.axvline(np.median(kms), color='blue', linestyle='--', label=f'Median: {np.median(kms):.2f}')
|
||||
ax3.legend()
|
||||
|
||||
# Plot 4: Km vs Data Points scatter
|
||||
ax4.scatter(df['DataPoints'], df['Km'], alpha=0.6)
|
||||
ax4.set_xlabel('Number of Data Points')
|
||||
ax4.set_ylabel('Km (mM)')
|
||||
ax4.set_title('Km vs Data Points')
|
||||
ax4.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save plot
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Substrate specificity plot saved to {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
return save_path or f"substrate_specificity_{ec_number.replace('.', '_')}.png"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting substrate specificity: {e}")
|
||||
return save_path
|
||||
|
||||
|
||||
def plot_michaelis_menten(ec_number: str, substrate: str = None, save_path: str = None, show_plot: bool = True) -> str:
|
||||
"""Generate Michaelis-Menten curves for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
# Get modeling parameters
|
||||
model_data = get_modeling_parameters(ec_number, substrate)
|
||||
|
||||
if not model_data or model_data.get('error'):
|
||||
print(f"No modeling data found for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
km = model_data.get('km')
|
||||
vmax = model_data.get('vmax')
|
||||
kcat = model_data.get('kcat')
|
||||
enzyme_conc = model_data.get('enzyme_conc', 1.0)
|
||||
|
||||
if not km:
|
||||
print(f"No Km data available for plotting")
|
||||
return save_path
|
||||
|
||||
# Create figure
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
|
||||
fig.suptitle(f'Michaelis-Menten Kinetics for EC {ec_number}' + (f' - {substrate}' if substrate else ''),
|
||||
fontsize=16, fontweight='bold')
|
||||
|
||||
# Generate substrate concentration range
|
||||
substrate_range = np.linspace(0, km * 5, 1000)
|
||||
|
||||
# Calculate reaction rates
|
||||
if vmax:
|
||||
# Use actual Vmax if available
|
||||
rates = (vmax * substrate_range) / (km + substrate_range)
|
||||
elif kcat and enzyme_conc:
|
||||
# Calculate Vmax from kcat and enzyme concentration
|
||||
vmax_calc = kcat * enzyme_conc
|
||||
rates = (vmax_calc * substrate_range) / (km + substrate_range)
|
||||
else:
|
||||
# Use normalized Vmax = 1.0
|
||||
rates = substrate_range / (km + substrate_range)
|
||||
|
||||
# Plot 1: Michaelis-Menten curve
|
||||
ax1.plot(substrate_range, rates, 'b-', linewidth=2, label='Michaelis-Menten')
|
||||
ax1.axhline(y=rates[-1] * 0.5, color='r', linestyle='--', alpha=0.7, label='0.5 × Vmax')
|
||||
ax1.axvline(x=km, color='g', linestyle='--', alpha=0.7, label=f'Km = {km:.2f}')
|
||||
ax1.set_xlabel('Substrate Concentration (mM)')
|
||||
ax1.set_ylabel('Reaction Rate')
|
||||
ax1.set_title('Michaelis-Menten Curve')
|
||||
ax1.legend()
|
||||
ax1.grid(True, alpha=0.3)
|
||||
|
||||
# Add annotation for Km
|
||||
km_rate = (substrate_range[km == min(substrate_range, key=lambda x: abs(x-km))] *
|
||||
(vmax if vmax else kcat * enzyme_conc if kcat else 1.0)) / (km +
|
||||
substrate_range[km == min(substrate_range, key=lambda x: abs(x-km))])
|
||||
ax1.plot(km, km_rate, 'ro', markersize=8)
|
||||
|
||||
# Plot 2: Lineweaver-Burk plot (double reciprocal)
|
||||
substrate_range_nonzero = substrate_range[substrate_range > 0]
|
||||
rates_nonzero = rates[substrate_range > 0]
|
||||
|
||||
reciprocal_substrate = 1 / substrate_range_nonzero
|
||||
reciprocal_rate = 1 / rates_nonzero
|
||||
|
||||
ax2.scatter(reciprocal_substrate, reciprocal_rate, alpha=0.6, s=10)
|
||||
|
||||
# Fit linear regression
|
||||
z = np.polyfit(reciprocal_substrate, reciprocal_rate, 1)
|
||||
p = np.poly1d(z)
|
||||
x_fit = np.linspace(min(reciprocal_substrate), max(reciprocal_substrate), 100)
|
||||
ax2.plot(x_fit, p(x_fit), 'r-', linewidth=2, label=f'1/Vmax = {z[1]:.3f}')
|
||||
|
||||
ax2.set_xlabel('1/[Substrate] (1/mM)')
|
||||
ax2.set_ylabel('1/Rate')
|
||||
ax2.set_title('Lineweaver-Burk Plot')
|
||||
ax2.legend()
|
||||
ax2.grid(True, alpha=0.3)
|
||||
|
||||
# Add parameter information
|
||||
info_text = f"Km = {km:.3f} mM"
|
||||
if vmax:
|
||||
info_text += f"\nVmax = {vmax:.3f}"
|
||||
if kcat:
|
||||
info_text += f"\nkcat = {kcat:.3f} s⁻¹"
|
||||
if enzyme_conc:
|
||||
info_text += f"\n[Enzyme] = {enzyme_conc:.3f} μM"
|
||||
|
||||
fig.text(0.02, 0.98, info_text, transform=fig.transFigure,
|
||||
fontsize=10, verticalalignment='top',
|
||||
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save plot
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Michaelis-Menten plot saved to {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
return save_path or f"michaelis_menten_{ec_number.replace('.', '_')}_{substrate or 'all'}.png"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting Michaelis-Menten: {e}")
|
||||
return save_path
|
||||
|
||||
|
||||
def create_heatmap_data(ec_number: str, parameters: List[str] = None) -> Dict[str, Any]:
|
||||
"""Create data for heatmap visualization."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
# Get comparison data across organisms
|
||||
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Bacillus subtilis",
|
||||
"Homo sapiens", "Mus musculus", "Rattus norvegicus"]
|
||||
comparison = compare_across_organisms(ec_number, organisms)
|
||||
|
||||
if not comparison:
|
||||
return None
|
||||
|
||||
# Create heatmap data
|
||||
heatmap_data = {
|
||||
'organisms': [],
|
||||
'average_km': [],
|
||||
'optimal_ph': [],
|
||||
'optimal_temperature': [],
|
||||
'data_points': []
|
||||
}
|
||||
|
||||
for comp in comparison:
|
||||
if comp.get('data_points', 0) > 0:
|
||||
heatmap_data['organisms'].append(comp['organism'])
|
||||
heatmap_data['average_km'].append(comp.get('average_km', 0))
|
||||
heatmap_data['optimal_ph'].append(comp.get('optimal_ph', 0))
|
||||
heatmap_data['optimal_temperature'].append(comp.get('optimal_temperature', 0))
|
||||
heatmap_data['data_points'].append(comp.get('data_points', 0))
|
||||
|
||||
return heatmap_data
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error creating heatmap data: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def plot_heatmap(ec_number: str, save_path: str = None, show_plot: bool = True) -> str:
|
||||
"""Create heatmap visualization of enzyme properties."""
|
||||
validate_dependencies()
|
||||
|
||||
try:
|
||||
heatmap_data = create_heatmap_data(ec_number)
|
||||
|
||||
if not heatmap_data or not heatmap_data['organisms']:
|
||||
print(f"No heatmap data available for EC {ec_number}")
|
||||
return save_path
|
||||
|
||||
if not PANDAS_AVAILABLE:
|
||||
print("pandas required for heatmap plotting")
|
||||
return save_path
|
||||
|
||||
# Create DataFrame for heatmap
|
||||
df = pd.DataFrame({
|
||||
'Organism': heatmap_data['organisms'],
|
||||
'Avg Km (mM)': heatmap_data['average_km'],
|
||||
'Optimal pH': heatmap_data['optimal_ph'],
|
||||
'Optimal Temp (°C)': heatmap_data['optimal_temperature'],
|
||||
'Data Points': heatmap_data['data_points']
|
||||
})
|
||||
|
||||
# Normalize data for better visualization
|
||||
df_normalized = df.copy()
|
||||
for col in ['Avg Km (mM)', 'Optimal pH', 'Optimal Temp (°C)', 'Data Points']:
|
||||
if col in df.columns:
|
||||
df_normalized[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
|
||||
|
||||
# Create figure
|
||||
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))
|
||||
fig.suptitle(f'Enzyme Properties Heatmap for EC {ec_number}', fontsize=16, fontweight='bold')
|
||||
|
||||
# Plot 1: Raw data heatmap
|
||||
heatmap_data_raw = df.set_index('Organism')[['Avg Km (mM)', 'Optimal pH', 'Optimal Temp (°C)', 'Data Points']].T
|
||||
sns.heatmap(heatmap_data_raw, annot=True, fmt='.2f', cmap='viridis', ax=ax1)
|
||||
ax1.set_title('Raw Values')
|
||||
|
||||
# Plot 2: Normalized data heatmap
|
||||
heatmap_data_norm = df_normalized.set_index('Organism')[['Avg Km (mM)', 'Optimal pH', 'Optimal Temp (°C)', 'Data Points']].T
|
||||
sns.heatmap(heatmap_data_norm, annot=True, fmt='.2f', cmap='viridis', ax=ax2)
|
||||
ax2.set_title('Normalized Values (0-1)')
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
# Save plot
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Heatmap plot saved to {save_path}")
|
||||
|
||||
if show_plot:
|
||||
plt.show()
|
||||
else:
|
||||
plt.close()
|
||||
|
||||
return save_path or f"heatmap_{ec_number.replace('.', '_')}.png"
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting heatmap: {e}")
|
||||
return save_path
|
||||
|
||||
|
||||
def generate_summary_plots(ec_number: str, save_dir: str = None) -> List[str]:
|
||||
"""Generate a comprehensive set of plots for an enzyme."""
|
||||
validate_dependencies()
|
||||
|
||||
if save_dir is None:
|
||||
save_dir = f"enzyme_plots_{ec_number.replace('.', '_')}"
|
||||
|
||||
# Create save directory
|
||||
Path(save_dir).mkdir(exist_ok=True)
|
||||
|
||||
generated_files = []
|
||||
|
||||
# Generate all plot types
|
||||
plot_functions = [
|
||||
('kinetic_parameters', plot_kinetic_parameters),
|
||||
('ph_profiles', plot_pH_profiles),
|
||||
('temperature_profiles', plot_temperature_profiles),
|
||||
('substrate_specificity', plot_substrate_specificity),
|
||||
('heatmap', plot_heatmap),
|
||||
]
|
||||
|
||||
for plot_name, plot_func in plot_functions:
|
||||
try:
|
||||
save_path = f"{save_dir}/{plot_name}_{ec_number.replace('.', '_')}.png"
|
||||
result_path = plot_func(ec_number, save_path=save_path, show_plot=False)
|
||||
if result_path:
|
||||
generated_files.append(result_path)
|
||||
print(f"Generated {plot_name} plot")
|
||||
else:
|
||||
print(f"Failed to generate {plot_name} plot")
|
||||
except Exception as e:
|
||||
print(f"Error generating {plot_name} plot: {e}")
|
||||
|
||||
# Generate organism comparison for common model organisms
|
||||
model_organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||
try:
|
||||
save_path = f"{save_dir}/organism_comparison_{ec_number.replace('.', '_')}.png"
|
||||
result_path = plot_organism_comparison(ec_number, model_organisms, save_path=save_path, show_plot=False)
|
||||
if result_path:
|
||||
generated_files.append(result_path)
|
||||
print("Generated organism comparison plot")
|
||||
except Exception as e:
|
||||
print(f"Error generating organism comparison plot: {e}")
|
||||
|
||||
# Generate Michaelis-Menten plot for most common substrate
|
||||
try:
|
||||
specificity = get_substrate_specificity(ec_number)
|
||||
if specificity:
|
||||
most_common = max(specificity, key=lambda x: x.get('data_points', 0))
|
||||
substrate_name = most_common['name'].split()[0] # Take first word
|
||||
save_path = f"{save_dir}/michaelis_menten_{ec_number.replace('.', '_')}_{substrate_name}.png"
|
||||
result_path = plot_michaelis_menten(ec_number, substrate_name, save_path=save_path, show_plot=False)
|
||||
if result_path:
|
||||
generated_files.append(result_path)
|
||||
print(f"Generated Michaelis-Menten plot for {substrate_name}")
|
||||
except Exception as e:
|
||||
print(f"Error generating Michaelis-Menten plot: {e}")
|
||||
|
||||
print(f"\nGenerated {len(generated_files)} plots in directory: {save_dir}")
|
||||
return generated_files
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
print("BRENDA Visualization Examples")
|
||||
print("=" * 40)
|
||||
|
||||
try:
|
||||
ec_number = "1.1.1.1" # Alcohol dehydrogenase
|
||||
|
||||
print(f"\n1. Generating kinetic parameters plot for EC {ec_number}")
|
||||
plot_kinetic_parameters(ec_number, show_plot=False)
|
||||
|
||||
print(f"\n2. Generating pH profile plot for EC {ec_number}")
|
||||
plot_pH_profiles(ec_number, show_plot=False)
|
||||
|
||||
print(f"\n3. Generating substrate specificity plot for EC {ec_number}")
|
||||
plot_substrate_specificity(ec_number, show_plot=False)
|
||||
|
||||
print(f"\n4. Generating Michaelis-Menten plot for EC {ec_number}")
|
||||
plot_michaelis_menten(ec_number, substrate="ethanol", show_plot=False)
|
||||
|
||||
print(f"\n5. Generating organism comparison plot for EC {ec_number}")
|
||||
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||||
plot_organism_comparison(ec_number, organisms, show_plot=False)
|
||||
|
||||
print(f"\n6. Generating comprehensive summary plots for EC {ec_number}")
|
||||
summary_files = generate_summary_plots(ec_number, show_plot=False)
|
||||
print(f"Generated {len(summary_files)} summary plots")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Example failed: {e}")
|
||||
1053
scientific-skills/brenda-database/scripts/enzyme_pathway_builder.py
Normal file
1053
scientific-skills/brenda-database/scripts/enzyme_pathway_builder.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user