mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-01-26 16:58:56 +08:00
- Add comprehensive BRENDA database skill with API integration
- Include enzyme data retrieval, pathway analysis, and visualization
- Support for enzyme queries, kinetic parameters, and taxonomy data
- Add visualization scripts for enzyme pathways and kinetics
1053 lines
44 KiB
Python
1053 lines
44 KiB
Python
"""
|
|
Enzyme Pathway Builder for Retrosynthetic Analysis
|
|
|
|
This module provides tools for constructing enzymatic pathways and
|
|
retrosynthetic trees using BRENDA database information.
|
|
|
|
Key features:
|
|
- Find enzymatic pathways for target products
|
|
- Build retrosynthetic trees from products
|
|
- Suggest enzyme substitutions and alternatives
|
|
- Calculate pathway feasibility and thermodynamics
|
|
- Optimize pathway conditions (pH, temperature, cofactors)
|
|
- Generate detailed pathway reports
|
|
- Support for metabolic engineering and synthetic biology
|
|
|
|
Installation:
|
|
uv pip install networkx matplotlib pandas
|
|
|
|
Usage:
|
|
from scripts.enzyme_pathway_builder import find_pathway_for_product, build_retrosynthetic_tree
|
|
|
|
pathway = find_pathway_for_product("lactate", max_steps=3)
|
|
tree = build_retrosynthetic_tree("lactate", depth=2)
|
|
"""
|
|
|
|
import re
|
|
import json
|
|
import time
|
|
from typing import List, Dict, Any, Optional, Set, Tuple
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import networkx as nx
|
|
NETWORKX_AVAILABLE = True
|
|
except ImportError:
|
|
print("Warning: networkx not installed. Install with: uv pip install networkx")
|
|
NETWORKX_AVAILABLE = False
|
|
|
|
try:
|
|
import pandas as pd
|
|
PANDAS_AVAILABLE = True
|
|
except ImportError:
|
|
print("Warning: pandas not installed. Install with: uv pip install pandas")
|
|
PANDAS_AVAILABLE = False
|
|
|
|
try:
|
|
import matplotlib.pyplot as plt
|
|
MATPLOTLIB_AVAILABLE = True
|
|
except ImportError:
|
|
print("Warning: matplotlib not installed. Install with: uv pip install matplotlib")
|
|
MATPLOTLIB_AVAILABLE = False
|
|
|
|
try:
|
|
from brenda_queries import (
|
|
search_enzymes_by_product, search_enzymes_by_substrate,
|
|
get_environmental_parameters, compare_across_organisms,
|
|
get_substrate_specificity, get_cofactor_requirements,
|
|
find_thermophilic_homologs, find_ph_stable_variants
|
|
)
|
|
BRENDA_QUERIES_AVAILABLE = True
|
|
except ImportError:
|
|
print("Warning: brenda_queries not available")
|
|
BRENDA_QUERIES_AVAILABLE = False
|
|
|
|
|
|
def validate_dependencies():
|
|
"""Validate that required dependencies are installed."""
|
|
missing = []
|
|
if not NETWORKX_AVAILABLE:
|
|
missing.append("networkx")
|
|
if not PANDAS_AVAILABLE:
|
|
missing.append("pandas")
|
|
if not BRENDA_QUERIES_AVAILABLE:
|
|
missing.append("brenda_queries")
|
|
if missing:
|
|
raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
|
|
|
|
|
|
# Common biochemical transformations with typical EC numbers
|
|
COMMON_TRANSFORMATIONS = {
|
|
'oxidation': ['1.1.1'], # Alcohol dehydrogenases
|
|
'reduction': ['1.1.1'], # Alcohol dehydrogenases
|
|
'hydrolysis': ['3.1.1', '3.1.3'], # Esterases, phosphatases
|
|
'carboxylation': ['6.4.1'], # Carboxylases
|
|
'decarboxylation': ['4.1.1'], # Decarboxylases
|
|
'transamination': ['2.6.1'], # Aminotransferases
|
|
'phosphorylation': ['2.7.1'], # Kinases
|
|
'dephosphorylation': ['3.1.3'], # Phosphatases
|
|
'isomerization': ['5.1.1', '5.3.1'], # Isomerases
|
|
'ligation': ['6.3.1'], # Ligases
|
|
'transfer': ['2.1.1', '2.2.1', '2.4.1'], # Transferases
|
|
'hydride_transfer': ['1.1.1', '1.2.1'], # Oxidoreductases
|
|
'group_transfer': ['2.1.1'], # Methyltransferases
|
|
}
|
|
|
|
# Simple metabolite database (expanded for pathway building)
|
|
METABOLITE_DATABASE = {
|
|
# Primary metabolites
|
|
'glucose': {'formula': 'C6H12O6', 'mw': 180.16, 'class': 'sugar'},
|
|
'fructose': {'formula': 'C6H12O6', 'mw': 180.16, 'class': 'sugar'},
|
|
'galactose': {'formula': 'C6H12O6', 'mw': 180.16, 'class': 'sugar'},
|
|
'pyruvate': {'formula': 'C3H4O3', 'mw': 90.08, 'class': 'carboxylic_acid'},
|
|
'lactate': {'formula': 'C3H6O3', 'mw': 90.08, 'class': 'carboxylic_acid'},
|
|
'acetate': {'formula': 'C2H4O2', 'mw': 60.05, 'class': 'carboxylic_acid'},
|
|
'ethanol': {'formula': 'C2H6O', 'mw': 46.07, 'class': 'alcohol'},
|
|
'acetaldehyde': {'formula': 'C2H4O', 'mw': 44.05, 'class': 'aldehyde'},
|
|
'acetone': {'formula': 'C3H6O', 'mw': 58.08, 'class': 'ketone'},
|
|
'glycerol': {'formula': 'C3H8O3', 'mw': 92.09, 'class': 'alcohol'},
|
|
'ammonia': {'formula': 'NH3', 'mw': 17.03, 'class': 'inorganic'},
|
|
'carbon dioxide': {'formula': 'CO2', 'mw': 44.01, 'class': 'inorganic'},
|
|
'water': {'formula': 'H2O', 'mw': 18.02, 'class': 'inorganic'},
|
|
'oxygen': {'formula': 'O2', 'mw': 32.00, 'class': 'inorganic'},
|
|
'hydrogen': {'formula': 'H2', 'mw': 2.02, 'class': 'inorganic'},
|
|
'nitrogen': {'formula': 'N2', 'mw': 28.01, 'class': 'inorganic'},
|
|
'phosphate': {'formula': 'PO4', 'mw': 94.97, 'class': 'inorganic'},
|
|
'sulfate': {'formula': 'SO4', 'mw': 96.06, 'class': 'inorganic'},
|
|
|
|
# Amino acids
|
|
'alanine': {'formula': 'C3H7NO2', 'mw': 89.09, 'class': 'amino_acid'},
|
|
'glycine': {'formula': 'C2H5NO2', 'mw': 75.07, 'class': 'amino_acid'},
|
|
'serine': {'formula': 'C3H7NO3', 'mw': 105.09, 'class': 'amino_acid'},
|
|
'threonine': {'formula': 'C4H9NO3', 'mw': 119.12, 'class': 'amino_acid'},
|
|
'aspartate': {'formula': 'C4H7NO4', 'mw': 133.10, 'class': 'amino_acid'},
|
|
'glutamate': {'formula': 'C5H9NO4', 'mw': 147.13, 'class': 'amino_acid'},
|
|
'asparagine': {'formula': 'C4H8N2O3', 'mw': 132.12, 'class': 'amino_acid'},
|
|
'glutamine': {'formula': 'C5H10N2O3', 'mw': 146.15, 'class': 'amino_acid'},
|
|
'lysine': {'formula': 'C6H14N2O2', 'mw': 146.19, 'class': 'amino_acid'},
|
|
'arginine': {'formula': 'C6H14N4O2', 'mw': 174.20, 'class': 'amino_acid'},
|
|
'histidine': {'formula': 'C6H9N3O2', 'mw': 155.16, 'class': 'amino_acid'},
|
|
'phenylalanine': {'formula': 'C9H11NO2', 'mw': 165.19, 'class': 'amino_acid'},
|
|
'tyrosine': {'formula': 'C9H11NO3', 'mw': 181.19, 'class': 'amino_acid'},
|
|
'tryptophan': {'formula': 'C11H12N2O2', 'mw': 204.23, 'class': 'amino_acid'},
|
|
'leucine': {'formula': 'C6H13NO2', 'mw': 131.18, 'class': 'amino_acid'},
|
|
'isoleucine': {'formula': 'C6H13NO2', 'mw': 131.18, 'class': 'amino_acid'},
|
|
'valine': {'formula': 'C5H11NO2', 'mw': 117.15, 'class': 'amino_acid'},
|
|
'methionine': {'formula': 'C5H11NO2S', 'mw': 149.21, 'class': 'amino_acid'},
|
|
'cysteine': {'formula': 'C3H7NO2S', 'mw': 121.16, 'class': 'amino_acid'},
|
|
'proline': {'formula': 'C5H9NO2', 'mw': 115.13, 'class': 'amino_acid'},
|
|
|
|
# Nucleotides (simplified)
|
|
'atp': {'formula': 'C10H16N5O13P3', 'mw': 507.18, 'class': 'nucleotide'},
|
|
'adp': {'formula': 'C10H15N5O10P2', 'mw': 427.20, 'class': 'nucleotide'},
|
|
'amp': {'formula': 'C10H14N5O7P', 'mw': 347.22, 'class': 'nucleotide'},
|
|
'nad': {'formula': 'C21H27N7O14P2', 'mw': 663.43, 'class': 'cofactor'},
|
|
'nadh': {'formula': 'C21H29N7O14P2', 'mw': 665.44, 'class': 'cofactor'},
|
|
'nadp': {'formula': 'C21H28N7O17P3', 'mw': 743.44, 'class': 'cofactor'},
|
|
'nadph': {'formula': 'C21H30N7O17P3', 'mw': 745.45, 'class': 'cofactor'},
|
|
'fadh2': {'formula': 'C21H30N7O14P2', 'mw': 785.55, 'class': 'cofactor'},
|
|
'fadx': {'formula': 'C21H20N4O2', 'mw': 350.36, 'class': 'cofactor'},
|
|
|
|
# Common organic acids
|
|
'malate': {'formula': 'C4H6O5', 'mw': 134.09, 'class': 'carboxylic_acid'},
|
|
'oxaloacetate': {'formula': 'C4H4O5', 'mw': 132.07, 'class': 'carboxylic_acid'},
|
|
'succinate': {'formula': 'C4H6O4', 'mw': 118.09, 'class': 'carboxylic_acid'},
|
|
'fumarate': {'formula': 'C4H4O4', 'mw': 116.07, 'class': 'carboxylic_acid'},
|
|
'oxalosuccinate': {'formula': 'C6H6O7', 'mw': 190.12, 'class': 'carboxylic_acid'},
|
|
'alpha-ketoglutarate': {'formula': 'C5H6O5', 'mw': 146.11, 'class': 'carboxylic_acid'},
|
|
|
|
# Energy carriers
|
|
'acetyl-coa': {'formula': 'C23H38N7O17P3S', 'mw': 809.51, 'class': 'cofactor'},
|
|
'coenzyme-a': {'formula': 'C21H36N7O16P3S', 'mw': 767.54, 'class': 'cofactor'},
|
|
}
|
|
|
|
# Common cofactors and their roles
|
|
COFACTOR_ROLES = {
|
|
'nad+': {'role': 'oxidation', 'oxidation_state': '+1'},
|
|
'nadh': {'role': 'reduction', 'oxidation_state': '0'},
|
|
'nadp+': {'role': 'oxidation', 'oxidation_state': '+1'},
|
|
'nadph': {'role': 'reduction', 'oxidation_state': '0'},
|
|
'fadx': {'role': 'oxidation', 'oxidation_state': '0'},
|
|
'fadh2': {'role': 'reduction', 'oxidation_state': '-2'},
|
|
'atp': {'role': 'phosphorylation', 'oxidation_state': '0'},
|
|
'adp': {'role': 'energy', 'oxidation_state': '0'},
|
|
'amp': {'role': 'energy', 'oxidation_state': '0'},
|
|
'acetyl-coa': {'role': 'acetylation', 'oxidation_state': '0'},
|
|
'coenzyme-a': {'role': 'thiolation', 'oxidation_state': '0'},
|
|
}
|
|
|
|
|
|
def identify_metabolite(metabolite_name: str) -> Dict[str, Any]:
|
|
"""Identify a metabolite from the database or create entry."""
|
|
metabolite_name = metabolite_name.lower().strip()
|
|
|
|
# Check if it's in the database
|
|
if metabolite_name in METABOLITE_DATABASE:
|
|
return {'name': metabolite_name, **METABOLITE_DATABASE[metabolite_name]}
|
|
|
|
# Simple formula extraction from common patterns
|
|
formula_patterns = {
|
|
r'c(\d+)h(\d+)o(\d+)': lambda m: f"C{m[0]}H{m[1]}O{m[2]}",
|
|
r'c(\d+)h(\d+)n(\d+)o(\d+)': lambda m: f"C{m[0]}H{m[1]}N{m[2]}O{m[3]}",
|
|
}
|
|
|
|
for pattern, formatter in formula_patterns.items():
|
|
match = re.search(pattern, metabolite_name)
|
|
if match:
|
|
formula = formatter(match.groups())
|
|
# Estimate molecular weight (C=12, H=1, N=14, O=16)
|
|
mw = 0
|
|
elements = re.findall(r'([A-Z])(\d*)', formula)
|
|
for elem, count in elements:
|
|
count = int(count) if count else 1
|
|
if elem == 'C':
|
|
mw += count * 12.01
|
|
elif elem == 'H':
|
|
mw += count * 1.008
|
|
elif elem == 'N':
|
|
mw += count * 14.01
|
|
elif elem == 'O':
|
|
mw += count * 16.00
|
|
elif elem == 'P':
|
|
mw += count * 30.97
|
|
elif elem == 'S':
|
|
mw += count * 32.07
|
|
|
|
return {
|
|
'name': metabolite_name,
|
|
'formula': formula,
|
|
'mw': mw,
|
|
'class': 'unknown'
|
|
}
|
|
|
|
# Fallback - unknown metabolite
|
|
return {
|
|
'name': metabolite_name,
|
|
'formula': 'Unknown',
|
|
'mw': 0,
|
|
'class': 'unknown'
|
|
}
|
|
|
|
|
|
def infer_transformation_type(substrate: str, product: str) -> List[str]:
|
|
"""Infer the type of transformation based on substrate and product."""
|
|
substrate_info = identify_metabolite(substrate)
|
|
product_info = identify_metabolite(product)
|
|
|
|
transformations = []
|
|
|
|
# Check for oxidation/reduction patterns
|
|
if 'alcohol' in substrate_info.get('class', '') and 'carboxylic_acid' in product_info.get('class', ''):
|
|
transformations.append('oxidation')
|
|
elif 'aldehyde' in substrate_info.get('class', '') and 'alcohol' in product_info.get('class', ''):
|
|
transformations.append('reduction')
|
|
elif 'alcohol' in substrate_info.get('class', '') and 'aldehyde' in product_info.get('class', ''):
|
|
transformations.append('oxidation')
|
|
|
|
# Check for phosphorylation/dephosphorylation
|
|
if 'phosphate' in product and 'phosphate' not in substrate:
|
|
transformations.append('phosphorylation')
|
|
elif 'phosphate' in substrate and 'phosphate' not in product:
|
|
transformations.append('dephosphorylation')
|
|
|
|
# Check for carboxylation/decarboxylation
|
|
if 'co2' in product and 'co2' not in substrate:
|
|
transformations.append('carboxylation')
|
|
elif 'co2' in substrate and 'co2' not in product:
|
|
transformations.append('decarboxylation')
|
|
|
|
# Check for hydrolysis (simple heuristic)
|
|
if 'ester' in substrate.lower() and ('carboxylic_acid' in product_info.get('class', '') or 'alcohol' in product_info.get('class', '')):
|
|
transformations.append('hydrolysis')
|
|
|
|
# Check for transamination
|
|
if 'amino_acid' in product_info.get('class', '') and 'amino_acid' not in substrate_info.get('class', ''):
|
|
transformations.append('transamination')
|
|
|
|
# Default to generic transformation
|
|
if not transformations:
|
|
transformations.append('generic')
|
|
|
|
return transformations
|
|
|
|
|
|
def find_enzymes_for_transformation(substrate: str, product: str, limit: int = 10) -> List[Dict[str, Any]]:
|
|
"""Find enzymes that catalyze a specific transformation."""
|
|
validate_dependencies()
|
|
|
|
# Infer transformation types
|
|
transformations = infer_transformation_type(substrate, product)
|
|
|
|
all_enzymes = []
|
|
|
|
# Try to find enzymes by product
|
|
try:
|
|
product_enzymes = search_enzymes_by_product(product, limit=limit)
|
|
for enzyme in product_enzymes:
|
|
# Check if substrate is in the reactants
|
|
if substrate.lower() in enzyme.get('reaction', '').lower():
|
|
enzyme['transformation'] = transformations[0] if transformations else 'generic'
|
|
enzyme['substrate'] = substrate
|
|
enzyme['product'] = product
|
|
enzyme['confidence'] = 'high'
|
|
all_enzymes.append(enzyme)
|
|
time.sleep(0.5) # Rate limiting
|
|
except Exception as e:
|
|
print(f"Error searching enzymes by product: {e}")
|
|
|
|
# Try to find enzymes by substrate
|
|
try:
|
|
substrate_enzymes = search_enzymes_by_substrate(substrate, limit=limit)
|
|
for enzyme in substrate_enzymes:
|
|
# Check if product is mentioned in substrate data (limited approach)
|
|
enzyme['transformation'] = transformations[0] if transformations else 'generic'
|
|
enzyme['substrate'] = substrate
|
|
enzyme['product'] = product
|
|
enzyme['confidence'] = 'medium'
|
|
all_enzymes.append(enzyme)
|
|
time.sleep(0.5) # Rate limiting
|
|
except Exception as e:
|
|
print(f"Error searching enzymes by substrate: {e}")
|
|
|
|
# If no enzymes found, try common EC numbers for transformation types
|
|
if not all_enzymes and transformations:
|
|
for trans_type in transformations:
|
|
if trans_type in COMMON_TRANSFORMATIONS:
|
|
for ec_prefix in COMMON_TRANSFORMATIONS[trans_type]:
|
|
# This is a simplified approach - in practice you'd want
|
|
# to query the specific EC numbers with more detail
|
|
try:
|
|
generic_enzymes = search_by_pattern(trans_type, limit=5)
|
|
for enzyme in generic_enzymes:
|
|
enzyme['transformation'] = trans_type
|
|
enzyme['substrate'] = substrate
|
|
enzyme['product'] = product
|
|
enzyme['confidence'] = 'low'
|
|
all_enzymes.append(enzyme)
|
|
time.sleep(0.5)
|
|
break
|
|
except Exception as e:
|
|
print(f"Error searching for transformation type {trans_type}: {e}")
|
|
|
|
# Remove duplicates and sort by confidence
|
|
unique_enzymes = []
|
|
seen = set()
|
|
for enzyme in all_enzymes:
|
|
key = (enzyme.get('ec_number', ''), enzyme.get('organism', ''))
|
|
if key not in seen:
|
|
seen.add(key)
|
|
unique_enzymes.append(enzyme)
|
|
|
|
# Sort by confidence (high > medium > low)
|
|
confidence_order = {'high': 3, 'medium': 2, 'low': 1}
|
|
unique_enzymes.sort(key=lambda x: confidence_order.get(x.get('confidence', 'low'), 0), reverse=True)
|
|
|
|
return unique_enzymes[:limit]
|
|
|
|
|
|
def find_pathway_for_product(product: str, max_steps: int = 3, starting_materials: List[str] = None) -> Dict[str, Any]:
|
|
"""Find enzymatic pathways to synthesize a target product."""
|
|
validate_dependencies()
|
|
|
|
if starting_materials is None:
|
|
# Common starting materials
|
|
starting_materials = ['glucose', 'pyruvate', 'acetate', 'ethanol', 'glycerol']
|
|
|
|
pathway = {
|
|
'target': product,
|
|
'max_steps': max_steps,
|
|
'starting_materials': starting_materials,
|
|
'steps': [],
|
|
'alternative_pathways': [],
|
|
'warnings': [],
|
|
'confidence': 0
|
|
}
|
|
|
|
# Simple breadth-first search for pathway
|
|
from collections import deque
|
|
|
|
queue = deque([(product, 0, [product])]) # (current_metabolite, step_count, pathway)
|
|
visited = set()
|
|
|
|
while queue and len(pathway['steps']) == 0:
|
|
current_metabolite, step_count, current_path = queue.popleft()
|
|
|
|
if current_metabolite in visited or step_count >= max_steps:
|
|
continue
|
|
|
|
visited.add(current_metabolite)
|
|
|
|
# Check if current metabolite is a starting material
|
|
if current_metabolite.lower() in [sm.lower() for sm in starting_materials]:
|
|
# Found a complete pathway
|
|
pathway['steps'] = []
|
|
for i in range(len(current_path) - 1):
|
|
substrate = current_path[i + 1]
|
|
product_step = current_path[i]
|
|
enzymes = find_enzymes_for_transformation(substrate, product_step, limit=5)
|
|
|
|
if enzymes:
|
|
pathway['steps'].append({
|
|
'step_number': i + 1,
|
|
'substrate': substrate,
|
|
'product': product_step,
|
|
'enzymes': enzymes,
|
|
'transformation': infer_transformation_type(substrate, product_step)
|
|
})
|
|
else:
|
|
pathway['warnings'].append(f"No enzymes found for step: {substrate} -> {product_step}")
|
|
|
|
pathway['confidence'] = 0.8 # High confidence for found pathway
|
|
break
|
|
|
|
# Try to find enzymes that produce current metabolite
|
|
if step_count < max_steps:
|
|
# Generate possible substrates (simplified - in practice you'd need metabolic knowledge)
|
|
possible_substrates = []
|
|
|
|
# Try common metabolic precursors
|
|
common_precursors = ['glucose', 'pyruvate', 'acetate', 'ethanol', 'acetyl-CoA', 'oxaloacetate']
|
|
for precursor in common_precursors:
|
|
enzymes = find_enzymes_for_transformation(precursor, current_metabolite, limit=2)
|
|
if enzymes:
|
|
possible_substrates.append(precursor)
|
|
pathway['alternative_pathways'].append({
|
|
'precursor': precursor,
|
|
'product': current_metabolite,
|
|
'enzymes': enzymes
|
|
})
|
|
|
|
# Add found substrates to queue
|
|
for substrate in possible_substrates:
|
|
if substrate not in current_path:
|
|
new_path = [substrate] + current_path
|
|
queue.append((substrate, step_count + 1, new_path))
|
|
|
|
time.sleep(0.2) # Rate limiting
|
|
|
|
# If no complete pathway found, create partial pathway
|
|
if not pathway['steps'] and pathway['alternative_pathways']:
|
|
# Create best guess pathway from alternatives
|
|
best_alternative = max(pathway['alternative_pathways'],
|
|
key=lambda x: len(x.get('enzymes', [])))
|
|
pathway['steps'] = [{
|
|
'step_number': 1,
|
|
'substrate': best_alternative['precursor'],
|
|
'product': best_alternative['product'],
|
|
'enzymes': best_alternative['enzymes'],
|
|
'transformation': infer_transformation_type(best_alternative['precursor'], best_alternative['product'])
|
|
}]
|
|
pathway['confidence'] = 0.3 # Low confidence for partial pathway
|
|
pathway['warnings'].append("Partial pathway only - complete synthesis route not found")
|
|
|
|
elif not pathway['steps']:
|
|
pathway['warnings'].append("No enzymatic pathway found for target product")
|
|
pathway['confidence'] = 0.1
|
|
|
|
return pathway
|
|
|
|
|
|
def build_retrosynthetic_tree(target: str, depth: int = 2) -> Dict[str, Any]:
|
|
"""Build a retrosynthetic tree for a target molecule."""
|
|
validate_dependencies()
|
|
|
|
tree = {
|
|
'target': target,
|
|
'depth': depth,
|
|
'nodes': {target: {'level': 0, 'children': [], 'enzymes': []}},
|
|
'edges': [],
|
|
'alternative_routes': []
|
|
}
|
|
|
|
# Build tree recursively
|
|
def build_node_recursive(metabolite: str, current_depth: int, parent: str = None) -> None:
|
|
if current_depth >= depth:
|
|
return
|
|
|
|
# Find enzymes that can produce this metabolite
|
|
potential_precursors = ['glucose', 'pyruvate', 'acetate', 'ethanol', 'acetyl-CoA',
|
|
'oxaloacetate', 'alpha-ketoglutarate', 'malate']
|
|
|
|
for precursor in potential_precursors:
|
|
enzymes = find_enzymes_for_transformation(precursor, metabolite, limit=3)
|
|
|
|
if enzymes:
|
|
# Add precursor as node if not exists
|
|
if precursor not in tree['nodes']:
|
|
tree['nodes'][precursor] = {
|
|
'level': current_depth + 1,
|
|
'children': [],
|
|
'enzymes': enzymes
|
|
}
|
|
tree['nodes'][metabolite]['children'].append(precursor)
|
|
tree['edges'].append({
|
|
'from': precursor,
|
|
'to': metabolite,
|
|
'enzymes': enzymes,
|
|
'transformation': infer_transformation_type(precursor, metabolite)
|
|
})
|
|
|
|
# Recursively build tree
|
|
if current_depth + 1 < depth:
|
|
build_node_recursive(precursor, current_depth + 1, metabolite)
|
|
|
|
# Try common metabolic transformations
|
|
if current_depth < depth - 1:
|
|
transformations = ['oxidation', 'reduction', 'hydrolysis', 'carboxylation', 'decarboxylation']
|
|
for trans in transformations:
|
|
try:
|
|
generic_enzymes = search_by_pattern(trans, limit=2)
|
|
if generic_enzymes:
|
|
# Create hypothetical precursor
|
|
hypothetical_precursor = f"precursor_{trans}_{metabolite}"
|
|
tree['nodes'][hypothetical_precursor] = {
|
|
'level': current_depth + 1,
|
|
'children': [],
|
|
'enzymes': generic_enzymes,
|
|
'hypothetical': True
|
|
}
|
|
tree['nodes'][metabolite]['children'].append(hypothetical_precursor)
|
|
tree['edges'].append({
|
|
'from': hypothetical_precursor,
|
|
'to': metabolite,
|
|
'enzymes': generic_enzymes,
|
|
'transformation': trans,
|
|
'hypothetical': True
|
|
})
|
|
except Exception as e:
|
|
print(f"Error in retrosynthetic search for {trans}: {e}")
|
|
|
|
time.sleep(0.3) # Rate limiting
|
|
|
|
# Start building from target
|
|
build_node_recursive(target, 0)
|
|
|
|
# Calculate tree statistics
|
|
tree['total_nodes'] = len(tree['nodes'])
|
|
tree['total_edges'] = len(tree['edges'])
|
|
tree['max_depth'] = max(node['level'] for node in tree['nodes'].values()) if tree['nodes'] else 0
|
|
|
|
return tree
|
|
|
|
|
|
def suggest_enzyme_substitutions(ec_number: str, criteria: Dict[str, Any] = None) -> List[Dict[str, Any]]:
|
|
"""Suggest alternative enzymes with improved properties."""
|
|
validate_dependencies()
|
|
|
|
if criteria is None:
|
|
criteria = {
|
|
'min_temperature': 30,
|
|
'max_temperature': 70,
|
|
'min_ph': 6.0,
|
|
'max_ph': 8.0,
|
|
'min_thermostability': 40,
|
|
'prefer_organisms': ['Escherichia coli', 'Saccharomyces cerevisiae', 'Bacillus subtilis']
|
|
}
|
|
|
|
substitutions = []
|
|
|
|
# Get organisms for the target enzyme
|
|
try:
|
|
organisms = compare_across_organisms(ec_number, criteria['prefer_organisms'])
|
|
time.sleep(0.5)
|
|
except Exception as e:
|
|
print(f"Error comparing organisms: {e}")
|
|
organisms = []
|
|
|
|
# Find thermophilic homologs if temperature is a criterion
|
|
if criteria.get('min_thermostability'):
|
|
try:
|
|
thermophilic = find_thermophilic_homologs(ec_number, criteria['min_thermostability'])
|
|
time.sleep(0.5)
|
|
|
|
for enzyme in thermophilic:
|
|
enzyme['substitution_reason'] = f"Thermostable (optimal temp: {enzyme['optimal_temperature']}°C)"
|
|
enzyme['score'] = 8.0 if enzyme['optimal_temperature'] >= criteria['min_thermostability'] else 6.0
|
|
substitutions.append(enzyme)
|
|
except Exception as e:
|
|
print(f"Error finding thermophilic homologs: {e}")
|
|
|
|
# Find pH-stable variants
|
|
if criteria.get('min_ph') or criteria.get('max_ph'):
|
|
try:
|
|
ph_stable = find_ph_stable_variants(ec_number, criteria.get('min_ph'), criteria.get('max_ph'))
|
|
time.sleep(0.5)
|
|
|
|
for enzyme in ph_stable:
|
|
enzyme['substitution_reason'] = f"pH stable ({enzyme['stability_type']} range: {enzyme['ph_range']})"
|
|
enzyme['score'] = 7.5
|
|
substitutions.append(enzyme)
|
|
except Exception as e:
|
|
print(f"Error finding pH-stable variants: {e}")
|
|
|
|
# Add organism comparison results
|
|
for org_data in organisms:
|
|
if org_data.get('data_points', 0) > 0:
|
|
org_data['substitution_reason'] = f"Well-characterized in {org_data['organism']}"
|
|
org_data['score'] = 6.5 if org_data['organism'] in criteria['prefer_organisms'] else 5.0
|
|
substitutions.append(org_data)
|
|
|
|
# Sort by score
|
|
substitutions.sort(key=lambda x: x.get('score', 0), reverse=True)
|
|
|
|
return substitutions[:10] # Return top 10 suggestions
|
|
|
|
|
|
def calculate_pathway_feasibility(pathway: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Calculate feasibility scores and potential issues for a pathway."""
|
|
validate_dependencies()
|
|
|
|
feasibility = {
|
|
'overall_score': 0,
|
|
'step_scores': [],
|
|
'warnings': [],
|
|
'recommendations': [],
|
|
'thermodynamic_feasibility': 0,
|
|
'enzyme_availability': 0,
|
|
'cofactor_requirements': [],
|
|
'optimal_conditions': {}
|
|
}
|
|
|
|
if not pathway.get('steps'):
|
|
feasibility['warnings'].append("No steps in pathway")
|
|
feasibility['overall_score'] = 0.1
|
|
return feasibility
|
|
|
|
total_score = 0
|
|
step_scores = []
|
|
|
|
for step in pathway['steps']:
|
|
step_score = 0
|
|
enzymes = step.get('enzymes', [])
|
|
|
|
# Score based on number of available enzymes
|
|
if len(enzymes) >= 3:
|
|
step_score += 3 # Multiple enzyme options
|
|
elif len(enzymes) >= 1:
|
|
step_score += 2 # At least one enzyme
|
|
else:
|
|
step_score += 0 # No enzymes
|
|
feasibility['warnings'].append(f"No enzymes found for step: {step['substrate']} -> {step['product']}")
|
|
|
|
# Score based on enzyme confidence
|
|
if enzymes:
|
|
high_confidence = sum(1 for e in enzymes if e.get('confidence') == 'high')
|
|
confidence_bonus = min(high_confidence, 2) # Max 2 points for confidence
|
|
step_score += confidence_bonus
|
|
|
|
# Check for industrial viability
|
|
industrial_organisms = ['Escherichia coli', 'Saccharomyces cerevisiae', 'Bacillus subtilis']
|
|
industrial_enzymes = sum(1 for e in enzymes if e.get('organism') in industrial_organisms)
|
|
if industrial_enzymes > 0:
|
|
step_score += 1
|
|
|
|
# Cap step score at 5
|
|
step_score = min(step_score, 5)
|
|
step_scores.append(step_score)
|
|
total_score += step_score
|
|
|
|
# Analyze cofactor requirements
|
|
try:
|
|
for enzyme in enzymes:
|
|
ec_number = enzyme.get('ec_number', '')
|
|
if ec_number:
|
|
cofactors = get_cofactor_requirements(ec_number)
|
|
for cofactor in cofactors:
|
|
if cofactor['name'] not in [c['name'] for c in feasibility['cofactor_requirements']]:
|
|
feasibility['cofactor_requirements'].append(cofactor)
|
|
time.sleep(0.3)
|
|
except Exception as e:
|
|
print(f"Error analyzing cofactors: {e}")
|
|
|
|
feasibility['step_scores'] = step_scores
|
|
feasibility['enzyme_availability'] = total_score / (len(step_scores) * 5) # Normalize to 0-1
|
|
feasibility['overall_score'] = feasibility['enzyme_availability'] * 0.7 # Weight enzyme availability
|
|
|
|
# Thermodynamic feasibility (simplified heuristic)
|
|
pathway_length = len(pathway['steps'])
|
|
if pathway_length <= 2:
|
|
feasibility['thermodynamic_feasibility'] = 0.8 # Short pathways are often feasible
|
|
elif pathway_length <= 4:
|
|
feasibility['thermodynamic_feasibility'] = 0.6
|
|
else:
|
|
feasibility['thermodynamic_feasibility'] = 0.4 # Long pathways may have thermodynamic issues
|
|
|
|
# Overall feasibility is weighted combination
|
|
feasibility['overall_score'] = (
|
|
feasibility['enzyme_availability'] * 0.6 +
|
|
feasibility['thermodynamic_feasibility'] * 0.4
|
|
)
|
|
|
|
# Generate recommendations
|
|
if feasibility['overall_score'] < 0.3:
|
|
feasibility['warnings'].append("Low overall pathway feasibility")
|
|
feasibility['recommendations'].append("Consider alternative starting materials or target molecules")
|
|
elif feasibility['overall_score'] < 0.6:
|
|
feasibility['warnings'].append("Moderate pathway feasibility")
|
|
feasibility['recommendations'].append("Consider enzyme engineering or cofactor recycling")
|
|
|
|
if feasibility['cofactor_requirements']:
|
|
feasibility['recommendations'].append("Implement cofactor recycling system for: " +
|
|
", ".join([c['name'] for c in feasibility['cofactor_requirements']]))
|
|
|
|
return feasibility
|
|
|
|
|
|
def optimize_pathway_conditions(pathway: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Suggest optimal conditions for the entire pathway."""
|
|
validate_dependencies()
|
|
|
|
optimization = {
|
|
'optimal_temperature': 30.0, # Default
|
|
'optimal_ph': 7.0, # Default
|
|
'temperature_range': (20, 40), # Default
|
|
'ph_range': (6.5, 7.5), # Default
|
|
'cofactor_system': [],
|
|
'organism_compatibility': {},
|
|
'process_recommendations': []
|
|
}
|
|
|
|
temperatures = []
|
|
phs = []
|
|
organism_preferences = {}
|
|
|
|
# Collect environmental data from all enzymes
|
|
for step in pathway.get('steps', []):
|
|
for enzyme in step.get('enzymes', []):
|
|
ec_number = enzyme.get('ec_number', '')
|
|
organism = enzyme.get('organism', '')
|
|
|
|
if ec_number:
|
|
try:
|
|
env_params = get_environmental_parameters(ec_number)
|
|
time.sleep(0.3)
|
|
|
|
if env_params.get('optimal_temperature'):
|
|
temperatures.append(env_params['optimal_temperature'])
|
|
if env_params.get('optimal_ph'):
|
|
phs.append(env_params['optimal_ph'])
|
|
|
|
# Track organism preferences
|
|
if organism not in organism_preferences:
|
|
organism_preferences[organism] = {
|
|
'temperature_optima': [],
|
|
'ph_optima': [],
|
|
'step_count': 0
|
|
}
|
|
|
|
organism_preferences[organism]['step_count'] += 1
|
|
if env_params.get('optimal_temperature'):
|
|
organism_preferences[organism]['temperature_optima'].append(env_params['optimal_temperature'])
|
|
if env_params.get('optimal_ph'):
|
|
organism_preferences[organism]['ph_optima'].append(env_params['optimal_ph'])
|
|
|
|
except Exception as e:
|
|
print(f"Error getting environmental parameters for {ec_number}: {e}")
|
|
|
|
# Calculate optimal conditions
|
|
if temperatures:
|
|
optimization['optimal_temperature'] = sum(temperatures) / len(temperatures)
|
|
optimization['temperature_range'] = (min(temperatures) - 5, max(temperatures) + 5)
|
|
|
|
if phs:
|
|
optimization['optimal_ph'] = sum(phs) / len(phs)
|
|
optimization['ph_range'] = (min(phs) - 0.5, max(phs) + 0.5)
|
|
|
|
# Find best organism compatibility
|
|
for organism, data in organism_preferences.items():
|
|
if data['temperature_optima'] and data['ph_optima']:
|
|
organism_preferences[organism]['avg_temp'] = sum(data['temperature_optima']) / len(data['temperature_optima'])
|
|
organism_preferences[organism]['avg_ph'] = sum(data['ph_optima']) / len(data['ph_optima'])
|
|
organism_preferences[organism]['compatibility_score'] = data['step_count']
|
|
|
|
# Sort organisms by compatibility
|
|
compatible_organisms = sorted(
|
|
[(org, data) for org, data in organism_preferences.items() if data.get('compatibility_score', 0) > 0],
|
|
key=lambda x: x[1]['compatibility_score'],
|
|
reverse=True
|
|
)
|
|
|
|
optimization['organism_compatibility'] = dict(compatible_organisms[:5]) # Top 5 organisms
|
|
|
|
# Generate process recommendations
|
|
if len(optimization['organism_compatibility']) > 1:
|
|
optimization['process_recommendations'].append("Consider multi-organism system or enzyme cocktails")
|
|
|
|
if optimization['temperature_range'][1] - optimization['temperature_range'][0] > 30:
|
|
optimization['process_recommendations'].append("Consider temperature gradient or staged process")
|
|
|
|
if optimization['ph_range'][1] - optimization['ph_range'][0] > 2:
|
|
optimization['process_recommendations'].append("Consider pH control system or buffer optimization")
|
|
|
|
# Cofactor system optimization
|
|
cofactor_types = {}
|
|
for step in pathway.get('steps', []):
|
|
for enzyme in step.get('enzymes', []):
|
|
ec_number = enzyme.get('ec_number', '')
|
|
if ec_number:
|
|
try:
|
|
cofactors = get_cofactor_requirements(ec_number)
|
|
for cofactor in cofactors:
|
|
cofactor_type = cofactor.get('type', 'other')
|
|
if cofactor_type not in cofactor_types:
|
|
cofactor_types[cofactor_type] = []
|
|
if cofactor['name'] not in cofactor_types[cofactor_type]:
|
|
cofactor_types[cofactor_type].append(cofactor['name'])
|
|
time.sleep(0.3)
|
|
except Exception as e:
|
|
print(f"Error getting cofactors for {ec_number}: {e}")
|
|
|
|
optimization['cofactor_system'] = cofactor_types
|
|
|
|
return optimization
|
|
|
|
|
|
def generate_pathway_report(pathway: Dict[str, Any], filename: str = None) -> str:
|
|
"""Generate a comprehensive pathway report."""
|
|
validate_dependencies()
|
|
|
|
if filename is None:
|
|
target_name = pathway.get('target', 'pathway').replace(' ', '_').lower()
|
|
filename = f"pathway_report_{target_name}.txt"
|
|
|
|
# Calculate feasibility and optimization
|
|
feasibility = calculate_pathway_feasibility(pathway)
|
|
optimization = optimize_pathway_conditions(pathway)
|
|
|
|
report = []
|
|
report.append("=" * 80)
|
|
report.append(f"ENZYMATIC PATHWAY REPORT")
|
|
report.append("=" * 80)
|
|
|
|
# Overview
|
|
report.append(f"\nTARGET PRODUCT: {pathway.get('target', 'Unknown')}")
|
|
report.append(f"PATHWAY LENGTH: {len(pathway.get('steps', []))} steps")
|
|
report.append(f"OVERALL FEASIBILITY: {feasibility['overall_score']:.2f}/1.00")
|
|
|
|
# Pathway steps
|
|
if pathway.get('steps'):
|
|
report.append("\n" + "=" * 40)
|
|
report.append("PATHWAY STEPS")
|
|
report.append("=" * 40)
|
|
|
|
for i, step in enumerate(pathway['steps'], 1):
|
|
report.append(f"\nStep {i}: {step['substrate']} -> {step['product']}")
|
|
report.append(f"Transformation: {', '.join(step.get('transformation', ['Unknown']))}")
|
|
|
|
if step.get('enzymes'):
|
|
report.append(f"Available enzymes: {len(step['enzymes'])}")
|
|
for j, enzyme in enumerate(step['enzymes'][:3], 1): # Top 3 enzymes
|
|
report.append(f" {j}. EC {enzyme.get('ec_number', 'Unknown')} - {enzyme.get('organism', 'Unknown')}")
|
|
report.append(f" Confidence: {enzyme.get('confidence', 'Unknown')}")
|
|
if enzyme.get('reaction'):
|
|
report.append(f" Reaction: {enzyme['reaction'][:100]}...")
|
|
|
|
if len(step['enzymes']) > 3:
|
|
report.append(f" ... and {len(step['enzymes']) - 3} additional enzymes")
|
|
else:
|
|
report.append(" No enzymes found for this step")
|
|
|
|
if feasibility.get('step_scores') and i-1 < len(feasibility['step_scores']):
|
|
report.append(f"Step feasibility score: {feasibility['step_scores'][i-1]}/5.0")
|
|
|
|
# Cofactor requirements
|
|
if feasibility.get('cofactor_requirements'):
|
|
report.append("\n" + "=" * 40)
|
|
report.append("COFACTOR REQUIREMENTS")
|
|
report.append("=" * 40)
|
|
|
|
for cofactor in feasibility['cofactor_requirements']:
|
|
report.append(f"- {cofactor['name']} ({cofactor.get('type', 'Unknown')})")
|
|
report.append(f" Organism: {cofactor.get('organism', 'Unknown')}")
|
|
report.append(f" EC Number: {cofactor.get('ec_number', 'Unknown')}")
|
|
|
|
# Optimal conditions
|
|
report.append("\n" + "=" * 40)
|
|
report.append("OPTIMAL CONDITIONS")
|
|
report.append("=" * 40)
|
|
|
|
report.append(f"Temperature: {optimization['optimal_temperature']:.1f}°C")
|
|
report.append(f"pH: {optimization['optimal_ph']:.1f}")
|
|
report.append(f"Temperature range: {optimization['temperature_range'][0]:.1f} - {optimization['temperature_range'][1]:.1f}°C")
|
|
report.append(f"pH range: {optimization['ph_range'][0]:.1f} - {optimization['ph_range'][1]:.1f}")
|
|
|
|
if optimization.get('organism_compatibility'):
|
|
report.append("\nCompatible organisms (by preference):")
|
|
for organism, data in list(optimization['organism_compatibility'].items())[:3]:
|
|
report.append(f"- {organism} (compatibility score: {data.get('compatibility_score', 0)})")
|
|
if data.get('avg_temp'):
|
|
report.append(f" Optimal temperature: {data['avg_temp']:.1f}°C")
|
|
if data.get('avg_ph'):
|
|
report.append(f" Optimal pH: {data['avg_ph']:.1f}")
|
|
|
|
# Warnings and recommendations
|
|
if feasibility.get('warnings'):
|
|
report.append("\n" + "=" * 40)
|
|
report.append("WARNINGS")
|
|
report.append("=" * 40)
|
|
|
|
for warning in feasibility['warnings']:
|
|
report.append(f"⚠️ {warning}")
|
|
|
|
if feasibility.get('recommendations'):
|
|
report.append("\n" + "=" * 40)
|
|
report.append("RECOMMENDATIONS")
|
|
report.append("=" * 40)
|
|
|
|
for rec in feasibility['recommendations']:
|
|
report.append(f"💡 {rec}")
|
|
|
|
if optimization.get('process_recommendations'):
|
|
for rec in optimization['process_recommendations']:
|
|
report.append(f"🔧 {rec}")
|
|
|
|
# Alternative pathways
|
|
if pathway.get('alternative_pathways'):
|
|
report.append("\n" + "=" * 40)
|
|
report.append("ALTERNATIVE ROUTES")
|
|
report.append("=" * 40)
|
|
|
|
for alt in pathway['alternative_pathways'][:5]: # Top 5 alternatives
|
|
report.append(f"\n{alt['precursor']} -> {alt['product']}")
|
|
report.append(f"Enzymes available: {len(alt.get('enzymes', []))}")
|
|
for enzyme in alt.get('enzymes', [])[:2]: # Top 2 enzymes
|
|
report.append(f" - {enzyme.get('ec_number', 'Unknown')} ({enzyme.get('organism', 'Unknown')})")
|
|
|
|
# Feasibility analysis
|
|
report.append("\n" + "=" * 40)
|
|
report.append("FEASIBILITY ANALYSIS")
|
|
report.append("=" * 40)
|
|
|
|
report.append(f"Enzyme availability score: {feasibility['enzyme_availability']:.2f}/1.00")
|
|
report.append(f"Thermodynamic feasibility: {feasibility['thermodynamic_feasibility']:.2f}/1.00")
|
|
|
|
# Write report to file
|
|
with open(filename, 'w') as f:
|
|
f.write('\n'.join(report))
|
|
|
|
print(f"Pathway report saved to {filename}")
|
|
return filename
|
|
|
|
|
|
def visualize_pathway(pathway: Dict[str, Any], save_path: str = None) -> str:
|
|
"""Create a visual representation of the pathway."""
|
|
validate_dependencies()
|
|
|
|
if not NETWORKX_AVAILABLE or not MATPLOTLIB_AVAILABLE:
|
|
print("networkx and matplotlib required for pathway visualization")
|
|
return save_path or "pathway_visualization.png"
|
|
|
|
try:
|
|
# Create directed graph
|
|
G = nx.DiGraph()
|
|
|
|
# Add nodes and edges
|
|
for step in pathway.get('steps', []):
|
|
substrate = step['substrate']
|
|
product = step['product']
|
|
enzymes = step.get('enzymes', [])
|
|
|
|
G.add_node(substrate, type='substrate')
|
|
G.add_node(product, type='product')
|
|
|
|
# Add edge with enzyme information
|
|
edge_label = f"{len(enzymes)} enzymes"
|
|
if enzymes:
|
|
primary_ec = enzymes[0].get('ec_number', 'Unknown')
|
|
edge_label += f"\nEC {primary_ec}"
|
|
|
|
G.add_edge(substrate, product, label=edge_label)
|
|
|
|
# Create figure
|
|
plt.figure(figsize=(12, 8))
|
|
|
|
# Layout
|
|
pos = nx.spring_layout(G, k=2, iterations=50)
|
|
|
|
# Draw nodes
|
|
substrate_nodes = [n for n, d in G.nodes(data=True) if d.get('type') == 'substrate']
|
|
product_nodes = [n for n, d in G.nodes(data=True) if d.get('type') == 'product']
|
|
intermediate_nodes = [n for n in G.nodes() if n not in substrate_nodes and n not in product_nodes]
|
|
|
|
nx.draw_networkx_nodes(G, pos, nodelist=substrate_nodes, node_color='lightblue', node_size=1500)
|
|
nx.draw_networkx_nodes(G, pos, nodelist=product_nodes, node_color='lightgreen', node_size=1500)
|
|
nx.draw_networkx_nodes(G, pos, nodelist=intermediate_nodes, node_color='lightyellow', node_size=1200)
|
|
|
|
# Draw edges
|
|
nx.draw_networkx_edges(G, pos, edge_color='gray', arrows=True, arrowsize=20)
|
|
|
|
# Draw labels
|
|
nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
|
|
|
|
# Draw edge labels
|
|
edge_labels = nx.get_edge_attributes(G, 'label')
|
|
nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=8)
|
|
|
|
# Add title
|
|
plt.title(f"Enzymatic Pathway to {pathway.get('target', 'Target')}", fontsize=14, fontweight='bold')
|
|
|
|
# Add legend
|
|
plt.scatter([], [], c='lightblue', s=150, label='Starting Materials')
|
|
plt.scatter([], [], c='lightyellow', s=120, label='Intermediates')
|
|
plt.scatter([], [], c='lightgreen', s=150, label='Products')
|
|
plt.legend()
|
|
|
|
plt.axis('off')
|
|
plt.tight_layout()
|
|
|
|
# Save or show
|
|
if save_path:
|
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
|
print(f"Pathway visualization saved to {save_path}")
|
|
else:
|
|
plt.show()
|
|
|
|
plt.close()
|
|
return save_path or "pathway_visualization.png"
|
|
|
|
except Exception as e:
|
|
print(f"Error visualizing pathway: {e}")
|
|
return save_path or "pathway_visualization.png"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
print("Enzyme Pathway Builder Examples")
|
|
print("=" * 50)
|
|
|
|
try:
|
|
# Example 1: Find pathway for lactate
|
|
print("\n1. Finding pathway for lactate production:")
|
|
pathway = find_pathway_for_product("lactate", max_steps=3)
|
|
print(f"Found pathway with {len(pathway['steps'])} steps")
|
|
print(f"Feasibility: {pathway['confidence']:.2f}")
|
|
|
|
# Example 2: Build retrosynthetic tree
|
|
print("\n2. Building retrosynthetic tree for ethanol:")
|
|
tree = build_retrosynthetic_tree("ethanol", depth=2)
|
|
print(f"Tree has {tree['total_nodes']} nodes and {tree['total_edges']} edges")
|
|
|
|
# Example 3: Suggest enzyme substitutions
|
|
print("\n3. Suggesting enzyme substitutions for alcohol dehydrogenase:")
|
|
substitutions = suggest_enzyme_substitutions("1.1.1.1")
|
|
for sub in substitutions[:3]:
|
|
print(f" - {sub.get('organism', 'Unknown')}: {sub.get('substitution_reason', 'No reason')}")
|
|
|
|
# Example 4: Calculate feasibility
|
|
print("\n4. Calculating pathway feasibility:")
|
|
feasibility = calculate_pathway_feasibility(pathway)
|
|
print(f"Overall score: {feasibility['overall_score']:.2f}")
|
|
print(f"Warnings: {len(feasibility['warnings'])}")
|
|
|
|
# Example 5: Generate pathway report
|
|
print("\n5. Generating pathway report:")
|
|
report_file = generate_pathway_report(pathway)
|
|
print(f"Report saved to: {report_file}")
|
|
|
|
# Example 6: Visualize pathway
|
|
print("\n6. Visualizing pathway:")
|
|
viz_file = visualize_pathway(pathway, "example_pathway.png")
|
|
print(f"Visualization saved to: {viz_file}")
|
|
|
|
except Exception as e:
|
|
print(f"Example failed: {e}") |