refactor(rdkit): update fingerprint generation to use rdFingerprintGenerator API

- Replace direct calls to AllChem, Pairs, and Torsions with rdFingerprintGenerator in similarity_search.py
- Update example code in SKILL.md to reflect the new API usage
- Maintain existing functionality while adopting the modern fingerprint generation interface recommended by RDKit
This commit is contained in:
jiaodu1307
2026-01-26 20:25:28 +08:00
parent a31cf4dd97
commit 06ac0af626
2 changed files with 31 additions and 18 deletions

View File

@@ -231,25 +231,30 @@ is_drug_like = mw and logp and hbd and hba
**Fingerprint Types:** **Fingerprint Types:**
```python ```python
from rdkit.Chem import AllChem, RDKFingerprint from rdkit.Chem import rdFingerprintGenerator
from rdkit.Chem.AtomPairs import Pairs, Torsions
from rdkit.Chem import MACCSkeys from rdkit.Chem import MACCSkeys
# RDKit topological fingerprint # RDKit topological fingerprint
fp = Chem.RDKFingerprint(mol) rdk_gen = rdFingerprintGenerator.GetRDKitFPGenerator(minPath=1, maxPath=7, fpSize=2048)
fp = rdk_gen.GetFingerprint(mol)
# Morgan fingerprints (circular fingerprints, similar to ECFP) # Morgan fingerprints (circular fingerprints, similar to ECFP)
fp = AllChem.GetMorganFingerprint(mol, radius=2) # Modern API using rdFingerprintGenerator
fp_bits = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048) morgan_gen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
fp = morgan_gen.GetFingerprint(mol)
# Count-based fingerprint
fp_count = morgan_gen.GetCountFingerprint(mol)
# MACCS keys (166-bit structural key) # MACCS keys (166-bit structural key)
fp = MACCSkeys.GenMACCSKeys(mol) fp = MACCSkeys.GenMACCSKeys(mol)
# Atom pair fingerprints # Atom pair fingerprints
fp = Pairs.GetAtomPairFingerprint(mol) ap_gen = rdFingerprintGenerator.GetAtomPairGenerator()
fp = ap_gen.GetFingerprint(mol)
# Topological torsion fingerprints # Topological torsion fingerprints
fp = Torsions.GetTopologicalTorsionFingerprint(mol) tt_gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
fp = tt_gen.GetFingerprint(mol)
# Avalon fingerprints (if available) # Avalon fingerprints (if available)
from rdkit.Avalon import pyAvalonTools from rdkit.Avalon import pyAvalonTools
@@ -260,14 +265,19 @@ fp = pyAvalonTools.GetAvalonFP(mol)
```python ```python
from rdkit import DataStructs from rdkit import DataStructs
from rdkit.Chem import rdFingerprintGenerator
# Generate fingerprints using generator
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
fp1 = mfpgen.GetFingerprint(mol1)
fp2 = mfpgen.GetFingerprint(mol2)
# Calculate Tanimoto similarity # Calculate Tanimoto similarity
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, radius=2)
fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, radius=2)
similarity = DataStructs.TanimotoSimilarity(fp1, fp2) similarity = DataStructs.TanimotoSimilarity(fp1, fp2)
# Calculate similarity for multiple molecules # Calculate similarity for multiple molecules
similarities = DataStructs.BulkTanimotoSimilarity(fp1, [fp2, fp3, fp4]) fps = [mfpgen.GetFingerprint(m) for m in [mol2, mol3, mol4]]
similarities = DataStructs.BulkTanimotoSimilarity(fp1, fps)
# Other similarity metrics # Other similarity metrics
dice = DataStructs.DiceSimilarity(fp1, fp2) dice = DataStructs.DiceSimilarity(fp1, fp2)
@@ -282,7 +292,8 @@ from rdkit.ML.Cluster import Butina
# Calculate distance matrix # Calculate distance matrix
dists = [] dists = []
fps = [AllChem.GetMorganFingerprintAsBitVect(mol, 2) for mol in mols] mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
fps = [mfpgen.GetFingerprint(mol) for mol in mols]
for i in range(len(fps)): for i in range(len(fps)):
sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i]) sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
dists.extend([1-sim for sim in sims]) dists.extend([1-sim for sim in sims])

View File

@@ -16,7 +16,7 @@ from pathlib import Path
try: try:
from rdkit import Chem from rdkit import Chem
from rdkit.Chem import AllChem, MACCSkeys from rdkit.Chem import AllChem, MACCSkeys, rdFingerprintGenerator
from rdkit import DataStructs from rdkit import DataStructs
except ImportError: except ImportError:
print("Error: RDKit not installed. Install with: conda install -c conda-forge rdkit") print("Error: RDKit not installed. Install with: conda install -c conda-forge rdkit")
@@ -40,17 +40,19 @@ def generate_fingerprint(mol, method='morgan', radius=2, n_bits=2048):
method = method.lower() method = method.lower()
if method == 'morgan': if method == 'morgan':
return AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits) gen = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=n_bits)
return gen.GetFingerprint(mol)
elif method == 'rdkit': elif method == 'rdkit':
return Chem.RDKFingerprint(mol, maxPath=7, fpSize=n_bits) gen = rdFingerprintGenerator.GetRDKitFPGenerator(maxPath=7, fpSize=n_bits)
return gen.GetFingerprint(mol)
elif method == 'maccs': elif method == 'maccs':
return MACCSkeys.GenMACCSKeys(mol) return MACCSkeys.GenMACCSKeys(mol)
elif method == 'atompair': elif method == 'atompair':
from rdkit.Chem.AtomPairs import Pairs gen = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=n_bits)
return Pairs.GetAtomPairFingerprintAsBitVect(mol, nBits=n_bits) return gen.GetFingerprint(mol)
elif method == 'torsion': elif method == 'torsion':
from rdkit.Chem.AtomPairs import Torsions gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=n_bits)
return Torsions.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=n_bits) return gen.GetFingerprint(mol)
else: else:
raise ValueError(f"Unknown fingerprint method: {method}") raise ValueError(f"Unknown fingerprint method: {method}")