mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
refactor(rdkit): update fingerprint generation to use rdFingerprintGenerator API
- Replace direct calls to AllChem, Pairs, and Torsions with rdFingerprintGenerator in similarity_search.py - Update example code in SKILL.md to reflect the new API usage - Maintain existing functionality while adopting the modern fingerprint generation interface recommended by RDKit
This commit is contained in:
@@ -231,25 +231,30 @@ is_drug_like = mw and logp and hbd and hba
|
||||
**Fingerprint Types:**
|
||||
|
||||
```python
|
||||
from rdkit.Chem import AllChem, RDKFingerprint
|
||||
from rdkit.Chem.AtomPairs import Pairs, Torsions
|
||||
from rdkit.Chem import rdFingerprintGenerator
|
||||
from rdkit.Chem import MACCSkeys
|
||||
|
||||
# RDKit topological fingerprint
|
||||
fp = Chem.RDKFingerprint(mol)
|
||||
rdk_gen = rdFingerprintGenerator.GetRDKitFPGenerator(minPath=1, maxPath=7, fpSize=2048)
|
||||
fp = rdk_gen.GetFingerprint(mol)
|
||||
|
||||
# Morgan fingerprints (circular fingerprints, similar to ECFP)
|
||||
fp = AllChem.GetMorganFingerprint(mol, radius=2)
|
||||
fp_bits = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
|
||||
# Modern API using rdFingerprintGenerator
|
||||
morgan_gen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
||||
fp = morgan_gen.GetFingerprint(mol)
|
||||
# Count-based fingerprint
|
||||
fp_count = morgan_gen.GetCountFingerprint(mol)
|
||||
|
||||
# MACCS keys (166-bit structural key)
|
||||
fp = MACCSkeys.GenMACCSKeys(mol)
|
||||
|
||||
# Atom pair fingerprints
|
||||
fp = Pairs.GetAtomPairFingerprint(mol)
|
||||
ap_gen = rdFingerprintGenerator.GetAtomPairGenerator()
|
||||
fp = ap_gen.GetFingerprint(mol)
|
||||
|
||||
# Topological torsion fingerprints
|
||||
fp = Torsions.GetTopologicalTorsionFingerprint(mol)
|
||||
tt_gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
|
||||
fp = tt_gen.GetFingerprint(mol)
|
||||
|
||||
# Avalon fingerprints (if available)
|
||||
from rdkit.Avalon import pyAvalonTools
|
||||
@@ -260,14 +265,19 @@ fp = pyAvalonTools.GetAvalonFP(mol)
|
||||
|
||||
```python
|
||||
from rdkit import DataStructs
|
||||
from rdkit.Chem import rdFingerprintGenerator
|
||||
|
||||
# Generate fingerprints using generator
|
||||
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
||||
fp1 = mfpgen.GetFingerprint(mol1)
|
||||
fp2 = mfpgen.GetFingerprint(mol2)
|
||||
|
||||
# Calculate Tanimoto similarity
|
||||
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, radius=2)
|
||||
fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, radius=2)
|
||||
similarity = DataStructs.TanimotoSimilarity(fp1, fp2)
|
||||
|
||||
# Calculate similarity for multiple molecules
|
||||
similarities = DataStructs.BulkTanimotoSimilarity(fp1, [fp2, fp3, fp4])
|
||||
fps = [mfpgen.GetFingerprint(m) for m in [mol2, mol3, mol4]]
|
||||
similarities = DataStructs.BulkTanimotoSimilarity(fp1, fps)
|
||||
|
||||
# Other similarity metrics
|
||||
dice = DataStructs.DiceSimilarity(fp1, fp2)
|
||||
@@ -282,7 +292,8 @@ from rdkit.ML.Cluster import Butina
|
||||
|
||||
# Calculate distance matrix
|
||||
dists = []
|
||||
fps = [AllChem.GetMorganFingerprintAsBitVect(mol, 2) for mol in mols]
|
||||
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
|
||||
fps = [mfpgen.GetFingerprint(mol) for mol in mols]
|
||||
for i in range(len(fps)):
|
||||
sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
|
||||
dists.extend([1-sim for sim in sims])
|
||||
|
||||
@@ -16,7 +16,7 @@ from pathlib import Path
|
||||
|
||||
try:
|
||||
from rdkit import Chem
|
||||
from rdkit.Chem import AllChem, MACCSkeys
|
||||
from rdkit.Chem import AllChem, MACCSkeys, rdFingerprintGenerator
|
||||
from rdkit import DataStructs
|
||||
except ImportError:
|
||||
print("Error: RDKit not installed. Install with: conda install -c conda-forge rdkit")
|
||||
@@ -40,17 +40,19 @@ def generate_fingerprint(mol, method='morgan', radius=2, n_bits=2048):
|
||||
method = method.lower()
|
||||
|
||||
if method == 'morgan':
|
||||
return AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
|
||||
gen = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=n_bits)
|
||||
return gen.GetFingerprint(mol)
|
||||
elif method == 'rdkit':
|
||||
return Chem.RDKFingerprint(mol, maxPath=7, fpSize=n_bits)
|
||||
gen = rdFingerprintGenerator.GetRDKitFPGenerator(maxPath=7, fpSize=n_bits)
|
||||
return gen.GetFingerprint(mol)
|
||||
elif method == 'maccs':
|
||||
return MACCSkeys.GenMACCSKeys(mol)
|
||||
elif method == 'atompair':
|
||||
from rdkit.Chem.AtomPairs import Pairs
|
||||
return Pairs.GetAtomPairFingerprintAsBitVect(mol, nBits=n_bits)
|
||||
gen = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=n_bits)
|
||||
return gen.GetFingerprint(mol)
|
||||
elif method == 'torsion':
|
||||
from rdkit.Chem.AtomPairs import Torsions
|
||||
return Torsions.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=n_bits)
|
||||
gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=n_bits)
|
||||
return gen.GetFingerprint(mol)
|
||||
else:
|
||||
raise ValueError(f"Unknown fingerprint method: {method}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user