mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-01-26 16:58:56 +08:00
- Add comprehensive BRENDA database skill with API integration
- Include enzyme data retrieval, pathway analysis, and visualization
- Support for enzyme queries, kinetic parameters, and taxonomy data
- Add visualization scripts for enzyme pathways and kinetics
844 lines
30 KiB
Python
844 lines
30 KiB
Python
"""
|
||
BRENDA Database Query Utilities
|
||
|
||
This module provides high-level functions for querying and analyzing
|
||
enzyme data from the BRENDA database using the SOAP API.
|
||
|
||
Key features:
|
||
- Parse BRENDA response data entries
|
||
- Search for enzymes by substrate/product
|
||
- Compare enzyme properties across organisms
|
||
- Retrieve kinetic parameters and environmental conditions
|
||
- Analyze substrate specificity and inhibition
|
||
- Support for enzyme engineering and pathway design
|
||
- Export data in various formats
|
||
|
||
Installation:
|
||
uv pip install zeep requests pandas
|
||
|
||
Usage:
|
||
from scripts.brenda_queries import search_enzymes_by_substrate, compare_across_organisms
|
||
|
||
enzymes = search_enzymes_by_substrate("glucose", limit=20)
|
||
comparison = compare_across_organisms("1.1.1.1", ["E. coli", "S. cerevisiae"])
|
||
"""
|
||
|
||
import re
|
||
import time
|
||
import json
|
||
import csv
|
||
from typing import List, Dict, Any, Optional, Tuple
|
||
from pathlib import Path
|
||
|
||
try:
|
||
from zeep import Client, Settings
|
||
from zeep.exceptions import Fault, TransportError
|
||
ZEEP_AVAILABLE = True
|
||
except ImportError:
|
||
print("Warning: zeep not installed. Install with: uv pip install zeep")
|
||
ZEEP_AVAILABLE = False
|
||
|
||
try:
|
||
import requests
|
||
REQUESTS_AVAILABLE = True
|
||
except ImportError:
|
||
print("Warning: requests not installed. Install with: uv pip install requests")
|
||
REQUESTS_AVAILABLE = False
|
||
|
||
try:
|
||
import pandas as pd
|
||
PANDAS_AVAILABLE = True
|
||
except ImportError:
|
||
print("Warning: pandas not installed. Install with: uv pip install pandas")
|
||
PANDAS_AVAILABLE = False
|
||
|
||
# Import the brenda_client from the project root
|
||
import sys
|
||
sys.path.append(str(Path(__file__).parent.parent.parent.parent))
|
||
|
||
try:
|
||
from brenda_client import get_km_values, get_reactions, call_brenda
|
||
BRENDA_CLIENT_AVAILABLE = True
|
||
except ImportError:
|
||
print("Warning: brenda_client not available")
|
||
BRENDA_CLIENT_AVAILABLE = False
|
||
|
||
|
||
def validate_dependencies():
|
||
"""Validate that required dependencies are installed."""
|
||
missing = []
|
||
if not ZEEP_AVAILABLE:
|
||
missing.append("zeep")
|
||
if not REQUESTS_AVAILABLE:
|
||
missing.append("requests")
|
||
if not BRENDA_CLIENT_AVAILABLE:
|
||
missing.append("brenda_client")
|
||
if missing:
|
||
raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
|
||
|
||
|
||
def parse_km_entry(entry: str) -> Dict[str, Any]:
|
||
"""Parse a BRENDA Km value entry into structured data."""
|
||
if not entry or not isinstance(entry, str):
|
||
return {}
|
||
|
||
parsed = {}
|
||
parts = entry.split('#')
|
||
|
||
for part in parts:
|
||
if '*' in part:
|
||
key, value = part.split('*', 1)
|
||
parsed[key.strip()] = value.strip()
|
||
|
||
# Extract numeric values from kmValue
|
||
if 'kmValue' in parsed:
|
||
km_value = parsed['kmValue']
|
||
# Extract first numeric value (in mM typically)
|
||
numeric_match = re.search(r'(\d+\.?\d*)', km_value)
|
||
if numeric_match:
|
||
parsed['km_value_numeric'] = float(numeric_match.group(1))
|
||
|
||
# Extract pH from commentary
|
||
if 'commentary' in parsed:
|
||
commentary = parsed['commentary']
|
||
ph_match = re.search(r'pH\s*([0-9.]+)', commentary)
|
||
if ph_match:
|
||
parsed['ph'] = float(ph_match.group(1))
|
||
|
||
temp_match = re.search(r'(\d+)\s*°?C', commentary)
|
||
if temp_match:
|
||
parsed['temperature'] = float(temp_match.group(1))
|
||
|
||
return parsed
|
||
|
||
|
||
def parse_reaction_entry(entry: str) -> Dict[str, Any]:
|
||
"""Parse a BRENDA reaction entry into structured data."""
|
||
if not entry or not isinstance(entry, str):
|
||
return {}
|
||
|
||
parsed = {}
|
||
parts = entry.split('#')
|
||
|
||
for part in parts:
|
||
if '*' in part:
|
||
key, value = part.split('*', 1)
|
||
parsed[key.strip()] = value.strip()
|
||
|
||
# Parse reaction equation
|
||
if 'reaction' in parsed:
|
||
reaction = parsed['reaction']
|
||
# Extract reactants and products
|
||
if '<=>' in reaction:
|
||
reactants, products = reaction.split('<=>', 1)
|
||
elif '->' in reaction:
|
||
reactants, products = reaction.split('->', 1)
|
||
elif '=' in reaction:
|
||
reactants, products = reaction.split('=', 1)
|
||
else:
|
||
reactants, products = reaction, ''
|
||
|
||
parsed['reactants'] = [r.strip() for r in reactants.split('+')]
|
||
parsed['products'] = [p.strip() for p in products.split('+')]
|
||
|
||
return parsed
|
||
|
||
|
||
def extract_organism_data(entry: str) -> Dict[str, Any]:
|
||
"""Extract organism-specific information from BRENDA entry."""
|
||
parsed = parse_km_entry(entry) if 'kmValue' in entry else parse_reaction_entry(entry)
|
||
|
||
if 'organism' in parsed:
|
||
return {
|
||
'organism': parsed['organism'],
|
||
'ec_number': parsed.get('ecNumber', ''),
|
||
'substrate': parsed.get('substrate', ''),
|
||
'km_value': parsed.get('kmValue', ''),
|
||
'km_numeric': parsed.get('km_value_numeric', None),
|
||
'ph': parsed.get('ph', None),
|
||
'temperature': parsed.get('temperature', None),
|
||
'commentary': parsed.get('commentary', ''),
|
||
'literature': parsed.get('literature', '')
|
||
}
|
||
|
||
return {}
|
||
|
||
|
||
def search_enzymes_by_substrate(substrate: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||
"""Search for enzymes that act on a specific substrate."""
|
||
validate_dependencies()
|
||
|
||
enzymes = []
|
||
|
||
# Search for Km values with the substrate
|
||
try:
|
||
km_data = get_km_values("*", substrate=substrate)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
for entry in km_data[:limit]:
|
||
parsed = parse_km_entry(entry)
|
||
if parsed:
|
||
enzymes.append({
|
||
'ec_number': parsed.get('ecNumber', ''),
|
||
'organism': parsed.get('organism', ''),
|
||
'substrate': parsed.get('substrate', ''),
|
||
'km_value': parsed.get('kmValue', ''),
|
||
'km_numeric': parsed.get('km_value_numeric', None),
|
||
'commentary': parsed.get('commentary', '')
|
||
})
|
||
except Exception as e:
|
||
print(f"Error searching enzymes by substrate: {e}")
|
||
|
||
# Remove duplicates based on EC number and organism
|
||
unique_enzymes = []
|
||
seen = set()
|
||
for enzyme in enzymes:
|
||
key = (enzyme['ec_number'], enzyme['organism'])
|
||
if key not in seen:
|
||
seen.add(key)
|
||
unique_enzymes.append(enzyme)
|
||
|
||
return unique_enzymes[:limit]
|
||
|
||
|
||
def search_enzymes_by_product(product: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||
"""Search for enzymes that produce a specific product."""
|
||
validate_dependencies()
|
||
|
||
enzymes = []
|
||
|
||
# Search for reactions containing the product
|
||
try:
|
||
# This is a simplified approach - in practice you might need
|
||
# more sophisticated pattern matching for products
|
||
reactions = get_reactions("*", reaction=f"*{product}*")
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
for entry in reactions[:limit]:
|
||
parsed = parse_reaction_entry(entry)
|
||
if parsed and 'products' in parsed:
|
||
# Check if our target product is in the products list
|
||
if any(product.lower() in prod.lower() for prod in parsed['products']):
|
||
enzymes.append({
|
||
'ec_number': parsed.get('ecNumber', ''),
|
||
'organism': parsed.get('organism', ''),
|
||
'reaction': parsed.get('reaction', ''),
|
||
'reactants': parsed.get('reactants', []),
|
||
'products': parsed.get('products', []),
|
||
'commentary': parsed.get('commentary', '')
|
||
})
|
||
except Exception as e:
|
||
print(f"Error searching enzymes by product: {e}")
|
||
|
||
return enzymes[:limit]
|
||
|
||
|
||
def compare_across_organisms(ec_number: str, organisms: List[str]) -> List[Dict[str, Any]]:
|
||
"""Compare enzyme properties across different organisms."""
|
||
validate_dependencies()
|
||
|
||
comparison = []
|
||
|
||
for organism in organisms:
|
||
try:
|
||
# Get Km data for this organism
|
||
km_data = get_km_values(ec_number, organism=organism)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
if km_data:
|
||
# Calculate statistics
|
||
numeric_kms = []
|
||
phs = []
|
||
temperatures = []
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
if 'km_value_numeric' in parsed:
|
||
numeric_kms.append(parsed['km_value_numeric'])
|
||
if 'ph' in parsed:
|
||
phs.append(parsed['ph'])
|
||
if 'temperature' in parsed:
|
||
temperatures.append(parsed['temperature'])
|
||
|
||
org_data = {
|
||
'organism': organism,
|
||
'ec_number': ec_number,
|
||
'data_points': len(km_data),
|
||
'average_km': sum(numeric_kms) / len(numeric_kms) if numeric_kms else None,
|
||
'min_km': min(numeric_kms) if numeric_kms else None,
|
||
'max_km': max(numeric_kms) if numeric_kms else None,
|
||
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||
'temperature_range': (min(temperatures), max(temperatures)) if temperatures else None
|
||
}
|
||
|
||
comparison.append(org_data)
|
||
else:
|
||
comparison.append({
|
||
'organism': organism,
|
||
'ec_number': ec_number,
|
||
'data_points': 0,
|
||
'note': 'No data found'
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error comparing organism {organism}: {e}")
|
||
comparison.append({
|
||
'organism': organism,
|
||
'ec_number': ec_number,
|
||
'error': str(e)
|
||
})
|
||
|
||
return comparison
|
||
|
||
|
||
def get_organisms_for_enzyme(ec_number: str) -> List[str]:
|
||
"""Get list of organisms that have data for a specific enzyme."""
|
||
validate_dependencies()
|
||
|
||
try:
|
||
km_data = get_km_values(ec_number)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
organisms = set()
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
if 'organism' in parsed:
|
||
organisms.add(parsed['organism'])
|
||
|
||
return sorted(list(organisms))
|
||
|
||
except Exception as e:
|
||
print(f"Error getting organisms for enzyme {ec_number}: {e}")
|
||
return []
|
||
|
||
|
||
def get_environmental_parameters(ec_number: str) -> Dict[str, Any]:
|
||
"""Get environmental parameters (pH, temperature) for an enzyme."""
|
||
validate_dependencies()
|
||
|
||
try:
|
||
km_data = get_km_values(ec_number)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
phs = []
|
||
temperatures = []
|
||
ph_stabilities = []
|
||
temp_stabilities = []
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
|
||
if 'ph' in parsed:
|
||
phs.append(parsed['ph'])
|
||
if 'temperature' in parsed:
|
||
temperatures.append(parsed['temperature'])
|
||
|
||
# Check commentary for stability information
|
||
commentary = parsed.get('commentary', '').lower()
|
||
if 'stable' in commentary and 'ph' in commentary:
|
||
# Extract pH stability range
|
||
ph_range_match = re.search(r'ph\s*([\d.]+)\s*[-–]\s*([\d.]+)', commentary)
|
||
if ph_range_match:
|
||
ph_stabilities.append((float(ph_range_match.group(1)), float(ph_range_match.group(2))))
|
||
|
||
if 'stable' in commentary and ('temp' in commentary or '°c' in commentary):
|
||
# Extract temperature stability
|
||
temp_match = re.search(r'(\d+)\s*[-–]\s*(\d+)\s*°?c', commentary)
|
||
if temp_match:
|
||
temp_stabilities.append((int(temp_match.group(1)), int(temp_match.group(2))))
|
||
|
||
params = {
|
||
'ec_number': ec_number,
|
||
'data_points': len(km_data),
|
||
'ph_range': (min(phs), max(phs)) if phs else None,
|
||
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||
'temperature_range': (min(temperatures), max(temperatures)) if temperatures else None,
|
||
'stability_ph': ph_stabilities[0] if ph_stabilities else None,
|
||
'temperature_stability': temp_stabilities[0] if temp_stabilities else None
|
||
}
|
||
|
||
return params
|
||
|
||
except Exception as e:
|
||
print(f"Error getting environmental parameters for {ec_number}: {e}")
|
||
return {'ec_number': ec_number, 'error': str(e)}
|
||
|
||
|
||
def get_cofactor_requirements(ec_number: str) -> List[Dict[str, Any]]:
|
||
"""Get cofactor requirements for an enzyme from reaction data."""
|
||
validate_dependencies()
|
||
|
||
cofactors = []
|
||
|
||
try:
|
||
reactions = get_reactions(ec_number)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
for entry in reactions:
|
||
parsed = parse_reaction_entry(entry)
|
||
if parsed and 'reactants' in parsed:
|
||
# Look for common cofactors in reactants
|
||
common_cofactors = [
|
||
'NAD+', 'NADH', 'NADP+', 'NADPH',
|
||
'ATP', 'ADP', 'AMP',
|
||
'FAD', 'FADH2',
|
||
'CoA', 'acetyl-CoA',
|
||
'pyridoxal phosphate', 'PLP',
|
||
'biotin',
|
||
'heme', 'iron-sulfur'
|
||
]
|
||
|
||
for reactant in parsed['reactants']:
|
||
for cofactor in common_cofactors:
|
||
if cofactor.lower() in reactant.lower():
|
||
cofactors.append({
|
||
'name': cofactor,
|
||
'full_name': reactant,
|
||
'type': 'oxidoreductase' if 'NAD' in cofactor else 'other',
|
||
'organism': parsed.get('organism', ''),
|
||
'ec_number': ec_number
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error getting cofactor requirements for {ec_number}: {e}")
|
||
|
||
# Remove duplicates
|
||
unique_cofactors = []
|
||
seen = set()
|
||
for cofactor in cofactors:
|
||
key = (cofactor['name'], cofactor['organism'])
|
||
if key not in seen:
|
||
seen.add(key)
|
||
unique_cofactors.append(cofactor)
|
||
|
||
return unique_cofactors
|
||
|
||
|
||
def get_substrate_specificity(ec_number: str) -> List[Dict[str, Any]]:
|
||
"""Get substrate specificity data for an enzyme."""
|
||
validate_dependencies()
|
||
|
||
specificity = []
|
||
|
||
try:
|
||
km_data = get_km_values(ec_number)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
substrate_data = {}
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
if 'substrate' in parsed and 'km_value_numeric' in parsed:
|
||
substrate = parsed['substrate']
|
||
if substrate not in substrate_data:
|
||
substrate_data[substrate] = {
|
||
'name': substrate,
|
||
'km_values': [],
|
||
'organisms': set(),
|
||
'vmax_values': [], # If available
|
||
'kcat_values': [] # If available
|
||
}
|
||
|
||
substrate_data[substrate]['km_values'].append(parsed['km_value_numeric'])
|
||
if 'organism' in parsed:
|
||
substrate_data[substrate]['organisms'].add(parsed['organism'])
|
||
|
||
# Calculate summary statistics
|
||
for substrate, data in substrate_data.items():
|
||
if data['km_values']:
|
||
specificity.append({
|
||
'name': substrate,
|
||
'km': sum(data['km_values']) / len(data['km_values']),
|
||
'min_km': min(data['km_values']),
|
||
'max_km': max(data['km_values']),
|
||
'data_points': len(data['km_values']),
|
||
'organisms': list(data['organisms']),
|
||
'vmax': sum(data['vmax_values']) / len(data['vmax_values']) if data['vmax_values'] else None,
|
||
'kcat': sum(data['kcat_values']) / len(data['kcat_values']) if data['kcat_values'] else None,
|
||
'kcat_km_ratio': None # Would need kcat data to calculate
|
||
})
|
||
|
||
# Sort by Km (lower is better affinity)
|
||
specificity.sort(key=lambda x: x['km'] if x['km'] else float('inf'))
|
||
|
||
except Exception as e:
|
||
print(f"Error getting substrate specificity for {ec_number}: {e}")
|
||
|
||
return specificity
|
||
|
||
|
||
def compare_substrate_affinity(ec_number: str) -> List[Dict[str, Any]]:
|
||
"""Compare substrate affinity for an enzyme."""
|
||
return get_substrate_specificity(ec_number)
|
||
|
||
|
||
def get_inhibitors(ec_number: str) -> List[Dict[str, Any]]:
|
||
"""Get inhibitor information for an enzyme (from commentary)."""
|
||
validate_dependencies()
|
||
|
||
inhibitors = []
|
||
|
||
try:
|
||
km_data = get_km_values(ec_number)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
commentary = parsed.get('commentary', '').lower()
|
||
|
||
# Look for inhibitor keywords
|
||
inhibitor_keywords = ['inhibited', 'inhibition', 'blocked', 'prevented', 'reduced']
|
||
if any(keyword in commentary for keyword in inhibitor_keywords):
|
||
# Try to extract inhibitor names (this is approximate)
|
||
# Common inhibitors
|
||
common_inhibitors = [
|
||
'iodoacetate', 'n-ethylmaleimide', 'p-chloromercuribenzoate',
|
||
'heavy metals', 'mercury', 'copper', 'zinc',
|
||
'cyanide', 'azide', 'carbon monoxide',
|
||
'edta', 'egta'
|
||
]
|
||
|
||
for inhibitor in common_inhibitors:
|
||
if inhibitor in commentary:
|
||
inhibitors.append({
|
||
'name': inhibitor,
|
||
'type': 'irreversible' if 'iodoacetate' in inhibitor or 'maleimide' in inhibitor else 'reversible',
|
||
'organism': parsed.get('organism', ''),
|
||
'ec_number': ec_number,
|
||
'commentary': parsed.get('commentary', '')
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error getting inhibitors for {ec_number}: {e}")
|
||
|
||
# Remove duplicates
|
||
unique_inhibitors = []
|
||
seen = set()
|
||
for inhibitor in inhibitors:
|
||
key = (inhibitor['name'], inhibitor['organism'])
|
||
if key not in seen:
|
||
seen.add(key)
|
||
unique_inhibitors.append(inhibitor)
|
||
|
||
return unique_inhibitors
|
||
|
||
|
||
def get_activators(ec_number: str) -> List[Dict[str, Any]]:
|
||
"""Get activator information for an enzyme (from commentary)."""
|
||
validate_dependencies()
|
||
|
||
activators = []
|
||
|
||
try:
|
||
km_data = get_km_values(ec_number)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
commentary = parsed.get('commentary', '').lower()
|
||
|
||
# Look for activator keywords
|
||
activator_keywords = ['activated', 'stimulated', 'enhanced', 'increased']
|
||
if any(keyword in commentary for keyword in activator_keywords):
|
||
# Try to extract activator names (this is approximate)
|
||
common_activators = [
|
||
'mg2+', 'mn2+', 'ca2+', 'zn2+',
|
||
'k+', 'na+',
|
||
'phosphate', 'pyrophosphate',
|
||
'dithiothreitol', 'dtt',
|
||
'β-mercaptoethanol'
|
||
]
|
||
|
||
for activator in common_activators:
|
||
if activator in commentary:
|
||
activators.append({
|
||
'name': activator,
|
||
'type': 'metal ion' if '+' in activator else 'reducing agent' if 'dtt' in activator.lower() or 'mercapto' in activator.lower() else 'other',
|
||
'mechanism': 'allosteric' if 'allosteric' in commentary else 'cofactor' else 'unknown',
|
||
'organism': parsed.get('organism', ''),
|
||
'ec_number': ec_number,
|
||
'commentary': parsed.get('commentary', '')
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error getting activators for {ec_number}: {e}")
|
||
|
||
# Remove duplicates
|
||
unique_activators = []
|
||
seen = set()
|
||
for activator in activators:
|
||
key = (activator['name'], activator['organism'])
|
||
if key not in seen:
|
||
seen.add(key)
|
||
unique_activators.append(activator)
|
||
|
||
return unique_activators
|
||
|
||
|
||
def find_thermophilic_homologs(ec_number: str, min_temp: int = 50) -> List[Dict[str, Any]]:
|
||
"""Find thermophilic homologs of an enzyme."""
|
||
validate_dependencies()
|
||
|
||
thermophilic = []
|
||
|
||
try:
|
||
organisms = get_organisms_for_enzyme(ec_number)
|
||
|
||
for organism in organisms:
|
||
# Check if organism might be thermophilic based on name
|
||
thermophilic_keywords = ['therm', 'hypertherm', 'pyro']
|
||
if any(keyword in organism.lower() for keyword in thermophilic_keywords):
|
||
# Get kinetic data to extract temperature information
|
||
km_data = get_km_values(ec_number, organism=organism)
|
||
time.sleep(0.2) # Rate limiting
|
||
|
||
temperatures = []
|
||
kms = []
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
if 'temperature' in parsed:
|
||
temperatures.append(parsed['temperature'])
|
||
if 'km_value_numeric' in parsed:
|
||
kms.append(parsed['km_value_numeric'])
|
||
|
||
if temperatures and max(temperatures) >= min_temp:
|
||
thermophilic.append({
|
||
'organism': organism,
|
||
'ec_number': ec_number,
|
||
'optimal_temperature': max(temperatures),
|
||
'temperature_range': (min(temperatures), max(temperatures)),
|
||
'km': sum(kms) / len(kms) if kms else None,
|
||
'data_points': len(km_data)
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error finding thermophilic homologs for {ec_number}: {e}")
|
||
|
||
return thermophilic
|
||
|
||
|
||
def find_ph_stable_variants(ec_number: str, min_ph: float = 8.0, max_ph: float = 6.0) -> List[Dict[str, Any]]:
|
||
"""Find pH-stable variants of an enzyme."""
|
||
validate_dependencies()
|
||
|
||
ph_stable = []
|
||
|
||
try:
|
||
organisms = get_organisms_for_enzyme(ec_number)
|
||
|
||
for organism in organisms:
|
||
km_data = get_km_values(ec_number, organism=organism)
|
||
time.sleep(0.2) # Rate limiting
|
||
|
||
phs = []
|
||
kms = []
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
if 'ph' in parsed:
|
||
phs.append(parsed['ph'])
|
||
if 'km_value_numeric' in parsed:
|
||
kms.append(parsed['km_value_numeric'])
|
||
|
||
if phs:
|
||
ph_range = (min(phs), max(phs))
|
||
is_alkaline_stable = min_ph and ph_range[0] >= min_ph
|
||
is_acid_stable = max_ph and ph_range[1] <= max_ph
|
||
|
||
if is_alkaline_stable or is_acid_stable:
|
||
ph_stable.append({
|
||
'organism': organism,
|
||
'ec_number': ec_number,
|
||
'ph_range': ph_range,
|
||
'optimal_ph': sum(phs) / len(phs),
|
||
'km': sum(kms) / len(kms) if kms else None,
|
||
'stability_type': 'alkaline' if is_alkaline_stable else 'acidic',
|
||
'data_points': len(km_data)
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error finding pH-stable variants for {ec_number}: {e}")
|
||
|
||
return ph_stable
|
||
|
||
|
||
def get_modeling_parameters(ec_number: str, substrate: str = None) -> Dict[str, Any]:
|
||
"""Get parameters suitable for kinetic modeling."""
|
||
validate_dependencies()
|
||
|
||
try:
|
||
if substrate:
|
||
km_data = get_km_values(ec_number, substrate=substrate)
|
||
else:
|
||
km_data = get_km_values(ec_number)
|
||
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
if not km_data:
|
||
return {'ec_number': ec_number, 'error': 'No kinetic data found'}
|
||
|
||
# Extract modeling parameters
|
||
kms = []
|
||
phs = []
|
||
temperatures = []
|
||
v_max_values = []
|
||
kcat_values = []
|
||
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
|
||
if 'km_value_numeric' in parsed:
|
||
kms.append(parsed['km_value_numeric'])
|
||
if 'ph' in parsed:
|
||
phs.append(parsed['ph'])
|
||
if 'temperature' in parsed:
|
||
temperatures.append(parsed['temperature'])
|
||
|
||
# Look for Vmax and kcat in commentary (rare in BRENDA)
|
||
commentary = parsed.get('commentary', '').lower()
|
||
vmax_match = re.search(r'vmax\s*=\s*([\d.]+)', commentary)
|
||
if vmax_match:
|
||
v_max_values.append(float(vmax_match.group(1)))
|
||
|
||
kcat_match = re.search(r'kcat\s*=\s*([\d.]+)', commentary)
|
||
if kcat_match:
|
||
kcat_values.append(float(kcat_match.group(1)))
|
||
|
||
modeling_data = {
|
||
'ec_number': ec_number,
|
||
'substrate': substrate if substrate else 'various',
|
||
'km': sum(kms) / len(kms) if kms else None,
|
||
'km_std': (sum((x - sum(kms)/len(kms))**2 for x in kms) / len(kms))**0.5 if kms else None,
|
||
'vmax': sum(v_max_values) / len(v_max_values) if v_max_values else None,
|
||
'kcat': sum(kcat_values) / len(kcat_values) if kcat_values else None,
|
||
'optimal_ph': sum(phs) / len(phs) if phs else None,
|
||
'optimal_temperature': sum(temperatures) / len(temperatures) if temperatures else None,
|
||
'data_points': len(km_data),
|
||
'temperature': sum(temperatures) / len(temperatures) if temperatures else 25.0, # Default to 25°C
|
||
'ph': sum(phs) / len(phs) if phs else 7.0, # Default to pH 7.0
|
||
'enzyme_conc': 1.0, # Default enzyme concentration (μM)
|
||
'substrate_conc': None, # Would be set by user
|
||
}
|
||
|
||
return modeling_data
|
||
|
||
except Exception as e:
|
||
return {'ec_number': ec_number, 'error': str(e)}
|
||
|
||
|
||
def export_kinetic_data(ec_number: str, format: str = 'csv', filename: str = None) -> str:
|
||
"""Export kinetic data to file."""
|
||
validate_dependencies()
|
||
|
||
if not filename:
|
||
filename = f"brenda_kinetic_data_{ec_number.replace('.', '_')}.{format}"
|
||
|
||
try:
|
||
# Get all kinetic data
|
||
km_data = get_km_values(ec_number)
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
if not km_data:
|
||
print(f"No kinetic data found for EC {ec_number}")
|
||
return filename
|
||
|
||
# Parse all entries
|
||
parsed_data = []
|
||
for entry in km_data:
|
||
parsed = parse_km_entry(entry)
|
||
if parsed:
|
||
parsed_data.append(parsed)
|
||
|
||
# Export based on format
|
||
if format.lower() == 'csv':
|
||
if parsed_data:
|
||
df = pd.DataFrame(parsed_data)
|
||
df.to_csv(filename, index=False)
|
||
else:
|
||
with open(filename, 'w', newline='') as f:
|
||
f.write('No data found')
|
||
|
||
elif format.lower() == 'json':
|
||
with open(filename, 'w') as f:
|
||
json.dump(parsed_data, f, indent=2, default=str)
|
||
|
||
elif format.lower() == 'excel':
|
||
if parsed_data and PANDAS_AVAILABLE:
|
||
df = pd.DataFrame(parsed_data)
|
||
df.to_excel(filename, index=False)
|
||
else:
|
||
print("pandas required for Excel export")
|
||
return filename
|
||
|
||
print(f"Exported {len(parsed_data)} entries to {filename}")
|
||
return filename
|
||
|
||
except Exception as e:
|
||
print(f"Error exporting data: {e}")
|
||
return filename
|
||
|
||
|
||
def search_by_pattern(pattern: str, limit: int = 50) -> List[Dict[str, Any]]:
|
||
"""Search enzymes using a reaction pattern or keyword."""
|
||
validate_dependencies()
|
||
|
||
enzymes = []
|
||
|
||
try:
|
||
# Search reactions containing the pattern
|
||
reactions = get_reactions("*", reaction=f"*{pattern}*")
|
||
time.sleep(0.5) # Rate limiting
|
||
|
||
for entry in reactions[:limit]:
|
||
parsed = parse_reaction_entry(entry)
|
||
if parsed:
|
||
enzymes.append({
|
||
'ec_number': parsed.get('ecNumber', ''),
|
||
'organism': parsed.get('organism', ''),
|
||
'reaction': parsed.get('reaction', ''),
|
||
'reactants': parsed.get('reactants', []),
|
||
'products': parsed.get('products', []),
|
||
'commentary': parsed.get('commentary', '')
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error searching by pattern '{pattern}': {e}")
|
||
|
||
return enzymes
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# Example usage
|
||
print("BRENDA Database Query Examples")
|
||
print("=" * 40)
|
||
|
||
try:
|
||
# Example 1: Search enzymes by substrate
|
||
print("\n1. Searching enzymes for 'glucose':")
|
||
enzymes = search_enzymes_by_substrate("glucose", limit=5)
|
||
for enzyme in enzymes:
|
||
print(f" EC {enzyme['ec_number']}: {enzyme['organism']}")
|
||
print(f" Km: {enzyme['km_value']}")
|
||
|
||
# Example 2: Compare across organisms
|
||
print("\n2. Comparing alcohol dehydrogenase (1.1.1.1) across organisms:")
|
||
organisms = ["Escherichia coli", "Saccharomyces cerevisiae", "Homo sapiens"]
|
||
comparison = compare_across_organisms("1.1.1.1", organisms)
|
||
for comp in comparison:
|
||
if comp.get('data_points', 0) > 0:
|
||
print(f" {comp['organism']}:")
|
||
print(f" Avg Km: {comp.get('average_km', 'N/A')}")
|
||
print(f" Optimal pH: {comp.get('optimal_ph', 'N/A')}")
|
||
|
||
# Example 3: Get environmental parameters
|
||
print("\n3. Environmental parameters for 1.1.1.1:")
|
||
params = get_environmental_parameters("1.1.1.1")
|
||
if params.get('data_points', 0) > 0:
|
||
print(f" pH range: {params.get('ph_range', 'N/A')}")
|
||
print(f" Temperature range: {params.get('temperature_range', 'N/A')}")
|
||
|
||
except Exception as e:
|
||
print(f"Example failed: {e}") |