Files
claude-scientific-skills/scientific-skills/brenda-database/scripts/enzyme_pathway_builder.py
Jinxiang Xie 280a53f95e Add BRENDA database skill for enzyme research and analysis
- Add comprehensive BRENDA database skill with API integration
      - Include enzyme data retrieval, pathway analysis, and visualization
      - Support for enzyme queries, kinetic parameters, and taxonomy data
      - Add visualization scripts for enzyme pathways and kinetics
2025-12-03 12:36:49 +08:00

1053 lines
44 KiB
Python

"""
Enzyme Pathway Builder for Retrosynthetic Analysis
This module provides tools for constructing enzymatic pathways and
retrosynthetic trees using BRENDA database information.
Key features:
- Find enzymatic pathways for target products
- Build retrosynthetic trees from products
- Suggest enzyme substitutions and alternatives
- Calculate pathway feasibility and thermodynamics
- Optimize pathway conditions (pH, temperature, cofactors)
- Generate detailed pathway reports
- Support for metabolic engineering and synthetic biology
Installation:
uv pip install networkx matplotlib pandas
Usage:
from scripts.enzyme_pathway_builder import find_pathway_for_product, build_retrosynthetic_tree
pathway = find_pathway_for_product("lactate", max_steps=3)
tree = build_retrosynthetic_tree("lactate", depth=2)
"""
import re
import json
import time
from typing import List, Dict, Any, Optional, Set, Tuple
from pathlib import Path
try:
import networkx as nx
NETWORKX_AVAILABLE = True
except ImportError:
print("Warning: networkx not installed. Install with: uv pip install networkx")
NETWORKX_AVAILABLE = False
try:
import pandas as pd
PANDAS_AVAILABLE = True
except ImportError:
print("Warning: pandas not installed. Install with: uv pip install pandas")
PANDAS_AVAILABLE = False
try:
import matplotlib.pyplot as plt
MATPLOTLIB_AVAILABLE = True
except ImportError:
print("Warning: matplotlib not installed. Install with: uv pip install matplotlib")
MATPLOTLIB_AVAILABLE = False
try:
from brenda_queries import (
search_enzymes_by_product, search_enzymes_by_substrate,
get_environmental_parameters, compare_across_organisms,
get_substrate_specificity, get_cofactor_requirements,
find_thermophilic_homologs, find_ph_stable_variants
)
BRENDA_QUERIES_AVAILABLE = True
except ImportError:
print("Warning: brenda_queries not available")
BRENDA_QUERIES_AVAILABLE = False
def validate_dependencies():
"""Validate that required dependencies are installed."""
missing = []
if not NETWORKX_AVAILABLE:
missing.append("networkx")
if not PANDAS_AVAILABLE:
missing.append("pandas")
if not BRENDA_QUERIES_AVAILABLE:
missing.append("brenda_queries")
if missing:
raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
# Common biochemical transformations with typical EC numbers
COMMON_TRANSFORMATIONS = {
'oxidation': ['1.1.1'], # Alcohol dehydrogenases
'reduction': ['1.1.1'], # Alcohol dehydrogenases
'hydrolysis': ['3.1.1', '3.1.3'], # Esterases, phosphatases
'carboxylation': ['6.4.1'], # Carboxylases
'decarboxylation': ['4.1.1'], # Decarboxylases
'transamination': ['2.6.1'], # Aminotransferases
'phosphorylation': ['2.7.1'], # Kinases
'dephosphorylation': ['3.1.3'], # Phosphatases
'isomerization': ['5.1.1', '5.3.1'], # Isomerases
'ligation': ['6.3.1'], # Ligases
'transfer': ['2.1.1', '2.2.1', '2.4.1'], # Transferases
'hydride_transfer': ['1.1.1', '1.2.1'], # Oxidoreductases
'group_transfer': ['2.1.1'], # Methyltransferases
}
# Simple metabolite database (expanded for pathway building)
METABOLITE_DATABASE = {
# Primary metabolites
'glucose': {'formula': 'C6H12O6', 'mw': 180.16, 'class': 'sugar'},
'fructose': {'formula': 'C6H12O6', 'mw': 180.16, 'class': 'sugar'},
'galactose': {'formula': 'C6H12O6', 'mw': 180.16, 'class': 'sugar'},
'pyruvate': {'formula': 'C3H4O3', 'mw': 90.08, 'class': 'carboxylic_acid'},
'lactate': {'formula': 'C3H6O3', 'mw': 90.08, 'class': 'carboxylic_acid'},
'acetate': {'formula': 'C2H4O2', 'mw': 60.05, 'class': 'carboxylic_acid'},
'ethanol': {'formula': 'C2H6O', 'mw': 46.07, 'class': 'alcohol'},
'acetaldehyde': {'formula': 'C2H4O', 'mw': 44.05, 'class': 'aldehyde'},
'acetone': {'formula': 'C3H6O', 'mw': 58.08, 'class': 'ketone'},
'glycerol': {'formula': 'C3H8O3', 'mw': 92.09, 'class': 'alcohol'},
'ammonia': {'formula': 'NH3', 'mw': 17.03, 'class': 'inorganic'},
'carbon dioxide': {'formula': 'CO2', 'mw': 44.01, 'class': 'inorganic'},
'water': {'formula': 'H2O', 'mw': 18.02, 'class': 'inorganic'},
'oxygen': {'formula': 'O2', 'mw': 32.00, 'class': 'inorganic'},
'hydrogen': {'formula': 'H2', 'mw': 2.02, 'class': 'inorganic'},
'nitrogen': {'formula': 'N2', 'mw': 28.01, 'class': 'inorganic'},
'phosphate': {'formula': 'PO4', 'mw': 94.97, 'class': 'inorganic'},
'sulfate': {'formula': 'SO4', 'mw': 96.06, 'class': 'inorganic'},
# Amino acids
'alanine': {'formula': 'C3H7NO2', 'mw': 89.09, 'class': 'amino_acid'},
'glycine': {'formula': 'C2H5NO2', 'mw': 75.07, 'class': 'amino_acid'},
'serine': {'formula': 'C3H7NO3', 'mw': 105.09, 'class': 'amino_acid'},
'threonine': {'formula': 'C4H9NO3', 'mw': 119.12, 'class': 'amino_acid'},
'aspartate': {'formula': 'C4H7NO4', 'mw': 133.10, 'class': 'amino_acid'},
'glutamate': {'formula': 'C5H9NO4', 'mw': 147.13, 'class': 'amino_acid'},
'asparagine': {'formula': 'C4H8N2O3', 'mw': 132.12, 'class': 'amino_acid'},
'glutamine': {'formula': 'C5H10N2O3', 'mw': 146.15, 'class': 'amino_acid'},
'lysine': {'formula': 'C6H14N2O2', 'mw': 146.19, 'class': 'amino_acid'},
'arginine': {'formula': 'C6H14N4O2', 'mw': 174.20, 'class': 'amino_acid'},
'histidine': {'formula': 'C6H9N3O2', 'mw': 155.16, 'class': 'amino_acid'},
'phenylalanine': {'formula': 'C9H11NO2', 'mw': 165.19, 'class': 'amino_acid'},
'tyrosine': {'formula': 'C9H11NO3', 'mw': 181.19, 'class': 'amino_acid'},
'tryptophan': {'formula': 'C11H12N2O2', 'mw': 204.23, 'class': 'amino_acid'},
'leucine': {'formula': 'C6H13NO2', 'mw': 131.18, 'class': 'amino_acid'},
'isoleucine': {'formula': 'C6H13NO2', 'mw': 131.18, 'class': 'amino_acid'},
'valine': {'formula': 'C5H11NO2', 'mw': 117.15, 'class': 'amino_acid'},
'methionine': {'formula': 'C5H11NO2S', 'mw': 149.21, 'class': 'amino_acid'},
'cysteine': {'formula': 'C3H7NO2S', 'mw': 121.16, 'class': 'amino_acid'},
'proline': {'formula': 'C5H9NO2', 'mw': 115.13, 'class': 'amino_acid'},
# Nucleotides (simplified)
'atp': {'formula': 'C10H16N5O13P3', 'mw': 507.18, 'class': 'nucleotide'},
'adp': {'formula': 'C10H15N5O10P2', 'mw': 427.20, 'class': 'nucleotide'},
'amp': {'formula': 'C10H14N5O7P', 'mw': 347.22, 'class': 'nucleotide'},
'nad': {'formula': 'C21H27N7O14P2', 'mw': 663.43, 'class': 'cofactor'},
'nadh': {'formula': 'C21H29N7O14P2', 'mw': 665.44, 'class': 'cofactor'},
'nadp': {'formula': 'C21H28N7O17P3', 'mw': 743.44, 'class': 'cofactor'},
'nadph': {'formula': 'C21H30N7O17P3', 'mw': 745.45, 'class': 'cofactor'},
'fadh2': {'formula': 'C21H30N7O14P2', 'mw': 785.55, 'class': 'cofactor'},
'fadx': {'formula': 'C21H20N4O2', 'mw': 350.36, 'class': 'cofactor'},
# Common organic acids
'malate': {'formula': 'C4H6O5', 'mw': 134.09, 'class': 'carboxylic_acid'},
'oxaloacetate': {'formula': 'C4H4O5', 'mw': 132.07, 'class': 'carboxylic_acid'},
'succinate': {'formula': 'C4H6O4', 'mw': 118.09, 'class': 'carboxylic_acid'},
'fumarate': {'formula': 'C4H4O4', 'mw': 116.07, 'class': 'carboxylic_acid'},
'oxalosuccinate': {'formula': 'C6H6O7', 'mw': 190.12, 'class': 'carboxylic_acid'},
'alpha-ketoglutarate': {'formula': 'C5H6O5', 'mw': 146.11, 'class': 'carboxylic_acid'},
# Energy carriers
'acetyl-coa': {'formula': 'C23H38N7O17P3S', 'mw': 809.51, 'class': 'cofactor'},
'coenzyme-a': {'formula': 'C21H36N7O16P3S', 'mw': 767.54, 'class': 'cofactor'},
}
# Common cofactors and their roles
COFACTOR_ROLES = {
'nad+': {'role': 'oxidation', 'oxidation_state': '+1'},
'nadh': {'role': 'reduction', 'oxidation_state': '0'},
'nadp+': {'role': 'oxidation', 'oxidation_state': '+1'},
'nadph': {'role': 'reduction', 'oxidation_state': '0'},
'fadx': {'role': 'oxidation', 'oxidation_state': '0'},
'fadh2': {'role': 'reduction', 'oxidation_state': '-2'},
'atp': {'role': 'phosphorylation', 'oxidation_state': '0'},
'adp': {'role': 'energy', 'oxidation_state': '0'},
'amp': {'role': 'energy', 'oxidation_state': '0'},
'acetyl-coa': {'role': 'acetylation', 'oxidation_state': '0'},
'coenzyme-a': {'role': 'thiolation', 'oxidation_state': '0'},
}
def identify_metabolite(metabolite_name: str) -> Dict[str, Any]:
"""Identify a metabolite from the database or create entry."""
metabolite_name = metabolite_name.lower().strip()
# Check if it's in the database
if metabolite_name in METABOLITE_DATABASE:
return {'name': metabolite_name, **METABOLITE_DATABASE[metabolite_name]}
# Simple formula extraction from common patterns
formula_patterns = {
r'c(\d+)h(\d+)o(\d+)': lambda m: f"C{m[0]}H{m[1]}O{m[2]}",
r'c(\d+)h(\d+)n(\d+)o(\d+)': lambda m: f"C{m[0]}H{m[1]}N{m[2]}O{m[3]}",
}
for pattern, formatter in formula_patterns.items():
match = re.search(pattern, metabolite_name)
if match:
formula = formatter(match.groups())
# Estimate molecular weight (C=12, H=1, N=14, O=16)
mw = 0
elements = re.findall(r'([A-Z])(\d*)', formula)
for elem, count in elements:
count = int(count) if count else 1
if elem == 'C':
mw += count * 12.01
elif elem == 'H':
mw += count * 1.008
elif elem == 'N':
mw += count * 14.01
elif elem == 'O':
mw += count * 16.00
elif elem == 'P':
mw += count * 30.97
elif elem == 'S':
mw += count * 32.07
return {
'name': metabolite_name,
'formula': formula,
'mw': mw,
'class': 'unknown'
}
# Fallback - unknown metabolite
return {
'name': metabolite_name,
'formula': 'Unknown',
'mw': 0,
'class': 'unknown'
}
def infer_transformation_type(substrate: str, product: str) -> List[str]:
"""Infer the type of transformation based on substrate and product."""
substrate_info = identify_metabolite(substrate)
product_info = identify_metabolite(product)
transformations = []
# Check for oxidation/reduction patterns
if 'alcohol' in substrate_info.get('class', '') and 'carboxylic_acid' in product_info.get('class', ''):
transformations.append('oxidation')
elif 'aldehyde' in substrate_info.get('class', '') and 'alcohol' in product_info.get('class', ''):
transformations.append('reduction')
elif 'alcohol' in substrate_info.get('class', '') and 'aldehyde' in product_info.get('class', ''):
transformations.append('oxidation')
# Check for phosphorylation/dephosphorylation
if 'phosphate' in product and 'phosphate' not in substrate:
transformations.append('phosphorylation')
elif 'phosphate' in substrate and 'phosphate' not in product:
transformations.append('dephosphorylation')
# Check for carboxylation/decarboxylation
if 'co2' in product and 'co2' not in substrate:
transformations.append('carboxylation')
elif 'co2' in substrate and 'co2' not in product:
transformations.append('decarboxylation')
# Check for hydrolysis (simple heuristic)
if 'ester' in substrate.lower() and ('carboxylic_acid' in product_info.get('class', '') or 'alcohol' in product_info.get('class', '')):
transformations.append('hydrolysis')
# Check for transamination
if 'amino_acid' in product_info.get('class', '') and 'amino_acid' not in substrate_info.get('class', ''):
transformations.append('transamination')
# Default to generic transformation
if not transformations:
transformations.append('generic')
return transformations
def find_enzymes_for_transformation(substrate: str, product: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Find enzymes that catalyze a specific transformation."""
validate_dependencies()
# Infer transformation types
transformations = infer_transformation_type(substrate, product)
all_enzymes = []
# Try to find enzymes by product
try:
product_enzymes = search_enzymes_by_product(product, limit=limit)
for enzyme in product_enzymes:
# Check if substrate is in the reactants
if substrate.lower() in enzyme.get('reaction', '').lower():
enzyme['transformation'] = transformations[0] if transformations else 'generic'
enzyme['substrate'] = substrate
enzyme['product'] = product
enzyme['confidence'] = 'high'
all_enzymes.append(enzyme)
time.sleep(0.5) # Rate limiting
except Exception as e:
print(f"Error searching enzymes by product: {e}")
# Try to find enzymes by substrate
try:
substrate_enzymes = search_enzymes_by_substrate(substrate, limit=limit)
for enzyme in substrate_enzymes:
# Check if product is mentioned in substrate data (limited approach)
enzyme['transformation'] = transformations[0] if transformations else 'generic'
enzyme['substrate'] = substrate
enzyme['product'] = product
enzyme['confidence'] = 'medium'
all_enzymes.append(enzyme)
time.sleep(0.5) # Rate limiting
except Exception as e:
print(f"Error searching enzymes by substrate: {e}")
# If no enzymes found, try common EC numbers for transformation types
if not all_enzymes and transformations:
for trans_type in transformations:
if trans_type in COMMON_TRANSFORMATIONS:
for ec_prefix in COMMON_TRANSFORMATIONS[trans_type]:
# This is a simplified approach - in practice you'd want
# to query the specific EC numbers with more detail
try:
generic_enzymes = search_by_pattern(trans_type, limit=5)
for enzyme in generic_enzymes:
enzyme['transformation'] = trans_type
enzyme['substrate'] = substrate
enzyme['product'] = product
enzyme['confidence'] = 'low'
all_enzymes.append(enzyme)
time.sleep(0.5)
break
except Exception as e:
print(f"Error searching for transformation type {trans_type}: {e}")
# Remove duplicates and sort by confidence
unique_enzymes = []
seen = set()
for enzyme in all_enzymes:
key = (enzyme.get('ec_number', ''), enzyme.get('organism', ''))
if key not in seen:
seen.add(key)
unique_enzymes.append(enzyme)
# Sort by confidence (high > medium > low)
confidence_order = {'high': 3, 'medium': 2, 'low': 1}
unique_enzymes.sort(key=lambda x: confidence_order.get(x.get('confidence', 'low'), 0), reverse=True)
return unique_enzymes[:limit]
def find_pathway_for_product(product: str, max_steps: int = 3, starting_materials: List[str] = None) -> Dict[str, Any]:
"""Find enzymatic pathways to synthesize a target product."""
validate_dependencies()
if starting_materials is None:
# Common starting materials
starting_materials = ['glucose', 'pyruvate', 'acetate', 'ethanol', 'glycerol']
pathway = {
'target': product,
'max_steps': max_steps,
'starting_materials': starting_materials,
'steps': [],
'alternative_pathways': [],
'warnings': [],
'confidence': 0
}
# Simple breadth-first search for pathway
from collections import deque
queue = deque([(product, 0, [product])]) # (current_metabolite, step_count, pathway)
visited = set()
while queue and len(pathway['steps']) == 0:
current_metabolite, step_count, current_path = queue.popleft()
if current_metabolite in visited or step_count >= max_steps:
continue
visited.add(current_metabolite)
# Check if current metabolite is a starting material
if current_metabolite.lower() in [sm.lower() for sm in starting_materials]:
# Found a complete pathway
pathway['steps'] = []
for i in range(len(current_path) - 1):
substrate = current_path[i + 1]
product_step = current_path[i]
enzymes = find_enzymes_for_transformation(substrate, product_step, limit=5)
if enzymes:
pathway['steps'].append({
'step_number': i + 1,
'substrate': substrate,
'product': product_step,
'enzymes': enzymes,
'transformation': infer_transformation_type(substrate, product_step)
})
else:
pathway['warnings'].append(f"No enzymes found for step: {substrate} -> {product_step}")
pathway['confidence'] = 0.8 # High confidence for found pathway
break
# Try to find enzymes that produce current metabolite
if step_count < max_steps:
# Generate possible substrates (simplified - in practice you'd need metabolic knowledge)
possible_substrates = []
# Try common metabolic precursors
common_precursors = ['glucose', 'pyruvate', 'acetate', 'ethanol', 'acetyl-CoA', 'oxaloacetate']
for precursor in common_precursors:
enzymes = find_enzymes_for_transformation(precursor, current_metabolite, limit=2)
if enzymes:
possible_substrates.append(precursor)
pathway['alternative_pathways'].append({
'precursor': precursor,
'product': current_metabolite,
'enzymes': enzymes
})
# Add found substrates to queue
for substrate in possible_substrates:
if substrate not in current_path:
new_path = [substrate] + current_path
queue.append((substrate, step_count + 1, new_path))
time.sleep(0.2) # Rate limiting
# If no complete pathway found, create partial pathway
if not pathway['steps'] and pathway['alternative_pathways']:
# Create best guess pathway from alternatives
best_alternative = max(pathway['alternative_pathways'],
key=lambda x: len(x.get('enzymes', [])))
pathway['steps'] = [{
'step_number': 1,
'substrate': best_alternative['precursor'],
'product': best_alternative['product'],
'enzymes': best_alternative['enzymes'],
'transformation': infer_transformation_type(best_alternative['precursor'], best_alternative['product'])
}]
pathway['confidence'] = 0.3 # Low confidence for partial pathway
pathway['warnings'].append("Partial pathway only - complete synthesis route not found")
elif not pathway['steps']:
pathway['warnings'].append("No enzymatic pathway found for target product")
pathway['confidence'] = 0.1
return pathway
def build_retrosynthetic_tree(target: str, depth: int = 2) -> Dict[str, Any]:
"""Build a retrosynthetic tree for a target molecule."""
validate_dependencies()
tree = {
'target': target,
'depth': depth,
'nodes': {target: {'level': 0, 'children': [], 'enzymes': []}},
'edges': [],
'alternative_routes': []
}
# Build tree recursively
def build_node_recursive(metabolite: str, current_depth: int, parent: str = None) -> None:
if current_depth >= depth:
return
# Find enzymes that can produce this metabolite
potential_precursors = ['glucose', 'pyruvate', 'acetate', 'ethanol', 'acetyl-CoA',
'oxaloacetate', 'alpha-ketoglutarate', 'malate']
for precursor in potential_precursors:
enzymes = find_enzymes_for_transformation(precursor, metabolite, limit=3)
if enzymes:
# Add precursor as node if not exists
if precursor not in tree['nodes']:
tree['nodes'][precursor] = {
'level': current_depth + 1,
'children': [],
'enzymes': enzymes
}
tree['nodes'][metabolite]['children'].append(precursor)
tree['edges'].append({
'from': precursor,
'to': metabolite,
'enzymes': enzymes,
'transformation': infer_transformation_type(precursor, metabolite)
})
# Recursively build tree
if current_depth + 1 < depth:
build_node_recursive(precursor, current_depth + 1, metabolite)
# Try common metabolic transformations
if current_depth < depth - 1:
transformations = ['oxidation', 'reduction', 'hydrolysis', 'carboxylation', 'decarboxylation']
for trans in transformations:
try:
generic_enzymes = search_by_pattern(trans, limit=2)
if generic_enzymes:
# Create hypothetical precursor
hypothetical_precursor = f"precursor_{trans}_{metabolite}"
tree['nodes'][hypothetical_precursor] = {
'level': current_depth + 1,
'children': [],
'enzymes': generic_enzymes,
'hypothetical': True
}
tree['nodes'][metabolite]['children'].append(hypothetical_precursor)
tree['edges'].append({
'from': hypothetical_precursor,
'to': metabolite,
'enzymes': generic_enzymes,
'transformation': trans,
'hypothetical': True
})
except Exception as e:
print(f"Error in retrosynthetic search for {trans}: {e}")
time.sleep(0.3) # Rate limiting
# Start building from target
build_node_recursive(target, 0)
# Calculate tree statistics
tree['total_nodes'] = len(tree['nodes'])
tree['total_edges'] = len(tree['edges'])
tree['max_depth'] = max(node['level'] for node in tree['nodes'].values()) if tree['nodes'] else 0
return tree
def suggest_enzyme_substitutions(ec_number: str, criteria: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Suggest alternative enzymes with improved properties."""
validate_dependencies()
if criteria is None:
criteria = {
'min_temperature': 30,
'max_temperature': 70,
'min_ph': 6.0,
'max_ph': 8.0,
'min_thermostability': 40,
'prefer_organisms': ['Escherichia coli', 'Saccharomyces cerevisiae', 'Bacillus subtilis']
}
substitutions = []
# Get organisms for the target enzyme
try:
organisms = compare_across_organisms(ec_number, criteria['prefer_organisms'])
time.sleep(0.5)
except Exception as e:
print(f"Error comparing organisms: {e}")
organisms = []
# Find thermophilic homologs if temperature is a criterion
if criteria.get('min_thermostability'):
try:
thermophilic = find_thermophilic_homologs(ec_number, criteria['min_thermostability'])
time.sleep(0.5)
for enzyme in thermophilic:
enzyme['substitution_reason'] = f"Thermostable (optimal temp: {enzyme['optimal_temperature']}°C)"
enzyme['score'] = 8.0 if enzyme['optimal_temperature'] >= criteria['min_thermostability'] else 6.0
substitutions.append(enzyme)
except Exception as e:
print(f"Error finding thermophilic homologs: {e}")
# Find pH-stable variants
if criteria.get('min_ph') or criteria.get('max_ph'):
try:
ph_stable = find_ph_stable_variants(ec_number, criteria.get('min_ph'), criteria.get('max_ph'))
time.sleep(0.5)
for enzyme in ph_stable:
enzyme['substitution_reason'] = f"pH stable ({enzyme['stability_type']} range: {enzyme['ph_range']})"
enzyme['score'] = 7.5
substitutions.append(enzyme)
except Exception as e:
print(f"Error finding pH-stable variants: {e}")
# Add organism comparison results
for org_data in organisms:
if org_data.get('data_points', 0) > 0:
org_data['substitution_reason'] = f"Well-characterized in {org_data['organism']}"
org_data['score'] = 6.5 if org_data['organism'] in criteria['prefer_organisms'] else 5.0
substitutions.append(org_data)
# Sort by score
substitutions.sort(key=lambda x: x.get('score', 0), reverse=True)
return substitutions[:10] # Return top 10 suggestions
def calculate_pathway_feasibility(pathway: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate feasibility scores and potential issues for a pathway."""
validate_dependencies()
feasibility = {
'overall_score': 0,
'step_scores': [],
'warnings': [],
'recommendations': [],
'thermodynamic_feasibility': 0,
'enzyme_availability': 0,
'cofactor_requirements': [],
'optimal_conditions': {}
}
if not pathway.get('steps'):
feasibility['warnings'].append("No steps in pathway")
feasibility['overall_score'] = 0.1
return feasibility
total_score = 0
step_scores = []
for step in pathway['steps']:
step_score = 0
enzymes = step.get('enzymes', [])
# Score based on number of available enzymes
if len(enzymes) >= 3:
step_score += 3 # Multiple enzyme options
elif len(enzymes) >= 1:
step_score += 2 # At least one enzyme
else:
step_score += 0 # No enzymes
feasibility['warnings'].append(f"No enzymes found for step: {step['substrate']} -> {step['product']}")
# Score based on enzyme confidence
if enzymes:
high_confidence = sum(1 for e in enzymes if e.get('confidence') == 'high')
confidence_bonus = min(high_confidence, 2) # Max 2 points for confidence
step_score += confidence_bonus
# Check for industrial viability
industrial_organisms = ['Escherichia coli', 'Saccharomyces cerevisiae', 'Bacillus subtilis']
industrial_enzymes = sum(1 for e in enzymes if e.get('organism') in industrial_organisms)
if industrial_enzymes > 0:
step_score += 1
# Cap step score at 5
step_score = min(step_score, 5)
step_scores.append(step_score)
total_score += step_score
# Analyze cofactor requirements
try:
for enzyme in enzymes:
ec_number = enzyme.get('ec_number', '')
if ec_number:
cofactors = get_cofactor_requirements(ec_number)
for cofactor in cofactors:
if cofactor['name'] not in [c['name'] for c in feasibility['cofactor_requirements']]:
feasibility['cofactor_requirements'].append(cofactor)
time.sleep(0.3)
except Exception as e:
print(f"Error analyzing cofactors: {e}")
feasibility['step_scores'] = step_scores
feasibility['enzyme_availability'] = total_score / (len(step_scores) * 5) # Normalize to 0-1
feasibility['overall_score'] = feasibility['enzyme_availability'] * 0.7 # Weight enzyme availability
# Thermodynamic feasibility (simplified heuristic)
pathway_length = len(pathway['steps'])
if pathway_length <= 2:
feasibility['thermodynamic_feasibility'] = 0.8 # Short pathways are often feasible
elif pathway_length <= 4:
feasibility['thermodynamic_feasibility'] = 0.6
else:
feasibility['thermodynamic_feasibility'] = 0.4 # Long pathways may have thermodynamic issues
# Overall feasibility is weighted combination
feasibility['overall_score'] = (
feasibility['enzyme_availability'] * 0.6 +
feasibility['thermodynamic_feasibility'] * 0.4
)
# Generate recommendations
if feasibility['overall_score'] < 0.3:
feasibility['warnings'].append("Low overall pathway feasibility")
feasibility['recommendations'].append("Consider alternative starting materials or target molecules")
elif feasibility['overall_score'] < 0.6:
feasibility['warnings'].append("Moderate pathway feasibility")
feasibility['recommendations'].append("Consider enzyme engineering or cofactor recycling")
if feasibility['cofactor_requirements']:
feasibility['recommendations'].append("Implement cofactor recycling system for: " +
", ".join([c['name'] for c in feasibility['cofactor_requirements']]))
return feasibility
def optimize_pathway_conditions(pathway: Dict[str, Any]) -> Dict[str, Any]:
"""Suggest optimal conditions for the entire pathway."""
validate_dependencies()
optimization = {
'optimal_temperature': 30.0, # Default
'optimal_ph': 7.0, # Default
'temperature_range': (20, 40), # Default
'ph_range': (6.5, 7.5), # Default
'cofactor_system': [],
'organism_compatibility': {},
'process_recommendations': []
}
temperatures = []
phs = []
organism_preferences = {}
# Collect environmental data from all enzymes
for step in pathway.get('steps', []):
for enzyme in step.get('enzymes', []):
ec_number = enzyme.get('ec_number', '')
organism = enzyme.get('organism', '')
if ec_number:
try:
env_params = get_environmental_parameters(ec_number)
time.sleep(0.3)
if env_params.get('optimal_temperature'):
temperatures.append(env_params['optimal_temperature'])
if env_params.get('optimal_ph'):
phs.append(env_params['optimal_ph'])
# Track organism preferences
if organism not in organism_preferences:
organism_preferences[organism] = {
'temperature_optima': [],
'ph_optima': [],
'step_count': 0
}
organism_preferences[organism]['step_count'] += 1
if env_params.get('optimal_temperature'):
organism_preferences[organism]['temperature_optima'].append(env_params['optimal_temperature'])
if env_params.get('optimal_ph'):
organism_preferences[organism]['ph_optima'].append(env_params['optimal_ph'])
except Exception as e:
print(f"Error getting environmental parameters for {ec_number}: {e}")
# Calculate optimal conditions
if temperatures:
optimization['optimal_temperature'] = sum(temperatures) / len(temperatures)
optimization['temperature_range'] = (min(temperatures) - 5, max(temperatures) + 5)
if phs:
optimization['optimal_ph'] = sum(phs) / len(phs)
optimization['ph_range'] = (min(phs) - 0.5, max(phs) + 0.5)
# Find best organism compatibility
for organism, data in organism_preferences.items():
if data['temperature_optima'] and data['ph_optima']:
organism_preferences[organism]['avg_temp'] = sum(data['temperature_optima']) / len(data['temperature_optima'])
organism_preferences[organism]['avg_ph'] = sum(data['ph_optima']) / len(data['ph_optima'])
organism_preferences[organism]['compatibility_score'] = data['step_count']
# Sort organisms by compatibility
compatible_organisms = sorted(
[(org, data) for org, data in organism_preferences.items() if data.get('compatibility_score', 0) > 0],
key=lambda x: x[1]['compatibility_score'],
reverse=True
)
optimization['organism_compatibility'] = dict(compatible_organisms[:5]) # Top 5 organisms
# Generate process recommendations
if len(optimization['organism_compatibility']) > 1:
optimization['process_recommendations'].append("Consider multi-organism system or enzyme cocktails")
if optimization['temperature_range'][1] - optimization['temperature_range'][0] > 30:
optimization['process_recommendations'].append("Consider temperature gradient or staged process")
if optimization['ph_range'][1] - optimization['ph_range'][0] > 2:
optimization['process_recommendations'].append("Consider pH control system or buffer optimization")
# Cofactor system optimization
cofactor_types = {}
for step in pathway.get('steps', []):
for enzyme in step.get('enzymes', []):
ec_number = enzyme.get('ec_number', '')
if ec_number:
try:
cofactors = get_cofactor_requirements(ec_number)
for cofactor in cofactors:
cofactor_type = cofactor.get('type', 'other')
if cofactor_type not in cofactor_types:
cofactor_types[cofactor_type] = []
if cofactor['name'] not in cofactor_types[cofactor_type]:
cofactor_types[cofactor_type].append(cofactor['name'])
time.sleep(0.3)
except Exception as e:
print(f"Error getting cofactors for {ec_number}: {e}")
optimization['cofactor_system'] = cofactor_types
return optimization
def generate_pathway_report(pathway: Dict[str, Any], filename: str = None) -> str:
"""Generate a comprehensive pathway report."""
validate_dependencies()
if filename is None:
target_name = pathway.get('target', 'pathway').replace(' ', '_').lower()
filename = f"pathway_report_{target_name}.txt"
# Calculate feasibility and optimization
feasibility = calculate_pathway_feasibility(pathway)
optimization = optimize_pathway_conditions(pathway)
report = []
report.append("=" * 80)
report.append(f"ENZYMATIC PATHWAY REPORT")
report.append("=" * 80)
# Overview
report.append(f"\nTARGET PRODUCT: {pathway.get('target', 'Unknown')}")
report.append(f"PATHWAY LENGTH: {len(pathway.get('steps', []))} steps")
report.append(f"OVERALL FEASIBILITY: {feasibility['overall_score']:.2f}/1.00")
# Pathway steps
if pathway.get('steps'):
report.append("\n" + "=" * 40)
report.append("PATHWAY STEPS")
report.append("=" * 40)
for i, step in enumerate(pathway['steps'], 1):
report.append(f"\nStep {i}: {step['substrate']} -> {step['product']}")
report.append(f"Transformation: {', '.join(step.get('transformation', ['Unknown']))}")
if step.get('enzymes'):
report.append(f"Available enzymes: {len(step['enzymes'])}")
for j, enzyme in enumerate(step['enzymes'][:3], 1): # Top 3 enzymes
report.append(f" {j}. EC {enzyme.get('ec_number', 'Unknown')} - {enzyme.get('organism', 'Unknown')}")
report.append(f" Confidence: {enzyme.get('confidence', 'Unknown')}")
if enzyme.get('reaction'):
report.append(f" Reaction: {enzyme['reaction'][:100]}...")
if len(step['enzymes']) > 3:
report.append(f" ... and {len(step['enzymes']) - 3} additional enzymes")
else:
report.append(" No enzymes found for this step")
if feasibility.get('step_scores') and i-1 < len(feasibility['step_scores']):
report.append(f"Step feasibility score: {feasibility['step_scores'][i-1]}/5.0")
# Cofactor requirements
if feasibility.get('cofactor_requirements'):
report.append("\n" + "=" * 40)
report.append("COFACTOR REQUIREMENTS")
report.append("=" * 40)
for cofactor in feasibility['cofactor_requirements']:
report.append(f"- {cofactor['name']} ({cofactor.get('type', 'Unknown')})")
report.append(f" Organism: {cofactor.get('organism', 'Unknown')}")
report.append(f" EC Number: {cofactor.get('ec_number', 'Unknown')}")
# Optimal conditions
report.append("\n" + "=" * 40)
report.append("OPTIMAL CONDITIONS")
report.append("=" * 40)
report.append(f"Temperature: {optimization['optimal_temperature']:.1f}°C")
report.append(f"pH: {optimization['optimal_ph']:.1f}")
report.append(f"Temperature range: {optimization['temperature_range'][0]:.1f} - {optimization['temperature_range'][1]:.1f}°C")
report.append(f"pH range: {optimization['ph_range'][0]:.1f} - {optimization['ph_range'][1]:.1f}")
if optimization.get('organism_compatibility'):
report.append("\nCompatible organisms (by preference):")
for organism, data in list(optimization['organism_compatibility'].items())[:3]:
report.append(f"- {organism} (compatibility score: {data.get('compatibility_score', 0)})")
if data.get('avg_temp'):
report.append(f" Optimal temperature: {data['avg_temp']:.1f}°C")
if data.get('avg_ph'):
report.append(f" Optimal pH: {data['avg_ph']:.1f}")
# Warnings and recommendations
if feasibility.get('warnings'):
report.append("\n" + "=" * 40)
report.append("WARNINGS")
report.append("=" * 40)
for warning in feasibility['warnings']:
report.append(f"⚠️ {warning}")
if feasibility.get('recommendations'):
report.append("\n" + "=" * 40)
report.append("RECOMMENDATIONS")
report.append("=" * 40)
for rec in feasibility['recommendations']:
report.append(f"💡 {rec}")
if optimization.get('process_recommendations'):
for rec in optimization['process_recommendations']:
report.append(f"🔧 {rec}")
# Alternative pathways
if pathway.get('alternative_pathways'):
report.append("\n" + "=" * 40)
report.append("ALTERNATIVE ROUTES")
report.append("=" * 40)
for alt in pathway['alternative_pathways'][:5]: # Top 5 alternatives
report.append(f"\n{alt['precursor']} -> {alt['product']}")
report.append(f"Enzymes available: {len(alt.get('enzymes', []))}")
for enzyme in alt.get('enzymes', [])[:2]: # Top 2 enzymes
report.append(f" - {enzyme.get('ec_number', 'Unknown')} ({enzyme.get('organism', 'Unknown')})")
# Feasibility analysis
report.append("\n" + "=" * 40)
report.append("FEASIBILITY ANALYSIS")
report.append("=" * 40)
report.append(f"Enzyme availability score: {feasibility['enzyme_availability']:.2f}/1.00")
report.append(f"Thermodynamic feasibility: {feasibility['thermodynamic_feasibility']:.2f}/1.00")
# Write report to file
with open(filename, 'w') as f:
f.write('\n'.join(report))
print(f"Pathway report saved to {filename}")
return filename
def visualize_pathway(pathway: Dict[str, Any], save_path: str = None) -> str:
"""Create a visual representation of the pathway."""
validate_dependencies()
if not NETWORKX_AVAILABLE or not MATPLOTLIB_AVAILABLE:
print("networkx and matplotlib required for pathway visualization")
return save_path or "pathway_visualization.png"
try:
# Create directed graph
G = nx.DiGraph()
# Add nodes and edges
for step in pathway.get('steps', []):
substrate = step['substrate']
product = step['product']
enzymes = step.get('enzymes', [])
G.add_node(substrate, type='substrate')
G.add_node(product, type='product')
# Add edge with enzyme information
edge_label = f"{len(enzymes)} enzymes"
if enzymes:
primary_ec = enzymes[0].get('ec_number', 'Unknown')
edge_label += f"\nEC {primary_ec}"
G.add_edge(substrate, product, label=edge_label)
# Create figure
plt.figure(figsize=(12, 8))
# Layout
pos = nx.spring_layout(G, k=2, iterations=50)
# Draw nodes
substrate_nodes = [n for n, d in G.nodes(data=True) if d.get('type') == 'substrate']
product_nodes = [n for n, d in G.nodes(data=True) if d.get('type') == 'product']
intermediate_nodes = [n for n in G.nodes() if n not in substrate_nodes and n not in product_nodes]
nx.draw_networkx_nodes(G, pos, nodelist=substrate_nodes, node_color='lightblue', node_size=1500)
nx.draw_networkx_nodes(G, pos, nodelist=product_nodes, node_color='lightgreen', node_size=1500)
nx.draw_networkx_nodes(G, pos, nodelist=intermediate_nodes, node_color='lightyellow', node_size=1200)
# Draw edges
nx.draw_networkx_edges(G, pos, edge_color='gray', arrows=True, arrowsize=20)
# Draw labels
nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
# Draw edge labels
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=8)
# Add title
plt.title(f"Enzymatic Pathway to {pathway.get('target', 'Target')}", fontsize=14, fontweight='bold')
# Add legend
plt.scatter([], [], c='lightblue', s=150, label='Starting Materials')
plt.scatter([], [], c='lightyellow', s=120, label='Intermediates')
plt.scatter([], [], c='lightgreen', s=150, label='Products')
plt.legend()
plt.axis('off')
plt.tight_layout()
# Save or show
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"Pathway visualization saved to {save_path}")
else:
plt.show()
plt.close()
return save_path or "pathway_visualization.png"
except Exception as e:
print(f"Error visualizing pathway: {e}")
return save_path or "pathway_visualization.png"
if __name__ == "__main__":
# Example usage
print("Enzyme Pathway Builder Examples")
print("=" * 50)
try:
# Example 1: Find pathway for lactate
print("\n1. Finding pathway for lactate production:")
pathway = find_pathway_for_product("lactate", max_steps=3)
print(f"Found pathway with {len(pathway['steps'])} steps")
print(f"Feasibility: {pathway['confidence']:.2f}")
# Example 2: Build retrosynthetic tree
print("\n2. Building retrosynthetic tree for ethanol:")
tree = build_retrosynthetic_tree("ethanol", depth=2)
print(f"Tree has {tree['total_nodes']} nodes and {tree['total_edges']} edges")
# Example 3: Suggest enzyme substitutions
print("\n3. Suggesting enzyme substitutions for alcohol dehydrogenase:")
substitutions = suggest_enzyme_substitutions("1.1.1.1")
for sub in substitutions[:3]:
print(f" - {sub.get('organism', 'Unknown')}: {sub.get('substitution_reason', 'No reason')}")
# Example 4: Calculate feasibility
print("\n4. Calculating pathway feasibility:")
feasibility = calculate_pathway_feasibility(pathway)
print(f"Overall score: {feasibility['overall_score']:.2f}")
print(f"Warnings: {len(feasibility['warnings'])}")
# Example 5: Generate pathway report
print("\n5. Generating pathway report:")
report_file = generate_pathway_report(pathway)
print(f"Report saved to: {report_file}")
# Example 6: Visualize pathway
print("\n6. Visualizing pathway:")
viz_file = visualize_pathway(pathway, "example_pathway.png")
print(f"Visualization saved to: {viz_file}")
except Exception as e:
print(f"Example failed: {e}")