refactor(rdkit): update fingerprint generation to use rdFingerprintGenerator API

- Replace direct calls to AllChem, Pairs, and Torsions with rdFingerprintGenerator in similarity_search.py
- Update example code in SKILL.md to reflect the new API usage
- Maintain existing functionality while adopting the modern fingerprint generation interface recommended by RDKit
This commit is contained in:
jiaodu1307
2026-01-26 20:25:28 +08:00
parent a31cf4dd97
commit 06ac0af626
2 changed files with 31 additions and 18 deletions

View File

@@ -231,25 +231,30 @@ is_drug_like = mw and logp and hbd and hba
**Fingerprint Types:**
```python
from rdkit.Chem import AllChem, RDKFingerprint
from rdkit.Chem.AtomPairs import Pairs, Torsions
from rdkit.Chem import rdFingerprintGenerator
from rdkit.Chem import MACCSkeys
# RDKit topological fingerprint
fp = Chem.RDKFingerprint(mol)
rdk_gen = rdFingerprintGenerator.GetRDKitFPGenerator(minPath=1, maxPath=7, fpSize=2048)
fp = rdk_gen.GetFingerprint(mol)
# Morgan fingerprints (circular fingerprints, similar to ECFP)
fp = AllChem.GetMorganFingerprint(mol, radius=2)
fp_bits = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
# Modern API using rdFingerprintGenerator
morgan_gen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
fp = morgan_gen.GetFingerprint(mol)
# Count-based fingerprint
fp_count = morgan_gen.GetCountFingerprint(mol)
# MACCS keys (166-bit structural key)
fp = MACCSkeys.GenMACCSKeys(mol)
# Atom pair fingerprints
fp = Pairs.GetAtomPairFingerprint(mol)
ap_gen = rdFingerprintGenerator.GetAtomPairGenerator()
fp = ap_gen.GetFingerprint(mol)
# Topological torsion fingerprints
fp = Torsions.GetTopologicalTorsionFingerprint(mol)
tt_gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
fp = tt_gen.GetFingerprint(mol)
# Avalon fingerprints (if available)
from rdkit.Avalon import pyAvalonTools
@@ -260,14 +265,19 @@ fp = pyAvalonTools.GetAvalonFP(mol)
```python
from rdkit import DataStructs
from rdkit.Chem import rdFingerprintGenerator
# Generate fingerprints using generator
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
fp1 = mfpgen.GetFingerprint(mol1)
fp2 = mfpgen.GetFingerprint(mol2)
# Calculate Tanimoto similarity
fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, radius=2)
fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, radius=2)
similarity = DataStructs.TanimotoSimilarity(fp1, fp2)
# Calculate similarity for multiple molecules
similarities = DataStructs.BulkTanimotoSimilarity(fp1, [fp2, fp3, fp4])
fps = [mfpgen.GetFingerprint(m) for m in [mol2, mol3, mol4]]
similarities = DataStructs.BulkTanimotoSimilarity(fp1, fps)
# Other similarity metrics
dice = DataStructs.DiceSimilarity(fp1, fp2)
@@ -282,7 +292,8 @@ from rdkit.ML.Cluster import Butina
# Calculate distance matrix
dists = []
fps = [AllChem.GetMorganFingerprintAsBitVect(mol, 2) for mol in mols]
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2, fpSize=2048)
fps = [mfpgen.GetFingerprint(mol) for mol in mols]
for i in range(len(fps)):
sims = DataStructs.BulkTanimotoSimilarity(fps[i], fps[:i])
dists.extend([1-sim for sim in sims])

View File

@@ -16,7 +16,7 @@ from pathlib import Path
try:
from rdkit import Chem
from rdkit.Chem import AllChem, MACCSkeys
from rdkit.Chem import AllChem, MACCSkeys, rdFingerprintGenerator
from rdkit import DataStructs
except ImportError:
print("Error: RDKit not installed. Install with: conda install -c conda-forge rdkit")
@@ -40,17 +40,19 @@ def generate_fingerprint(mol, method='morgan', radius=2, n_bits=2048):
method = method.lower()
if method == 'morgan':
return AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
gen = rdFingerprintGenerator.GetMorganGenerator(radius=radius, fpSize=n_bits)
return gen.GetFingerprint(mol)
elif method == 'rdkit':
return Chem.RDKFingerprint(mol, maxPath=7, fpSize=n_bits)
gen = rdFingerprintGenerator.GetRDKitFPGenerator(maxPath=7, fpSize=n_bits)
return gen.GetFingerprint(mol)
elif method == 'maccs':
return MACCSkeys.GenMACCSKeys(mol)
elif method == 'atompair':
from rdkit.Chem.AtomPairs import Pairs
return Pairs.GetAtomPairFingerprintAsBitVect(mol, nBits=n_bits)
gen = rdFingerprintGenerator.GetAtomPairGenerator(fpSize=n_bits)
return gen.GetFingerprint(mol)
elif method == 'torsion':
from rdkit.Chem.AtomPairs import Torsions
return Torsions.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=n_bits)
gen = rdFingerprintGenerator.GetTopologicalTorsionGenerator(fpSize=n_bits)
return gen.GetFingerprint(mol)
else:
raise ValueError(f"Unknown fingerprint method: {method}")