import os
import json
import tempfile
from deepchem_server.core import config
from deepchem_server.core.cards import DataCard
from deepchem_server.core.progress_logger import log_progress
from deepchem.dock.pose_generation import VinaPoseGenerator
# Import RDKit at module level
try:
from rdkit import Chem
RDKIT_AVAILABLE = True
except ImportError:
RDKIT_AVAILABLE = False
[docs]
def split_pdbqt_docked_ligands(docked_ligand_name: str, docking_method: str):
"""
Split PDBQT file with multiple modes into separate files for each mode.
"""
with open(docked_ligand_name, 'r+') as fp:
# Read and store all lines into list
lines = fp.readlines()
# Remove the MODEL 'n' and ENDMDL lines when num_modes is greater than 1
ind_MDL = [i for i, x in enumerate(lines) if "MODEL" in x]
ind_ENDMDL = [i for i, x in enumerate(lines) if "ENDMDL" in x]
remove_ind = ind_MDL + ind_ENDMDL
remove_ind.sort()
lines = [i for j, i in enumerate(lines) if j not in remove_ind]
if docking_method == 'VINA' or docking_method == 'qVINA-W':
# Find the indexes to split the ligands
indx = [i for i, x in enumerate(lines) if "REMARK VINA RESULT:" in x]
indx.append(len(lines) - 1)
elif docking_method == 'GNINA':
indx = [i for i, x in enumerate(lines) if "REMARK Name =" in x]
indx.append(len(lines) - 1)
# Write the pdbqt docked ligands
for i in range(len(indx)):
if i == len(indx) - 1:
with open("%s_ligand_docked.pdbqt" % str(i), 'w') as fp_i:
# Move file pointer to the beginning of a file
fp_i.seek(0)
# Truncate the file
fp_i.truncate()
# Remove lines (first and last) that breaks Pose Scoring code
fp_i.writelines(lines[indx[i - 1]:indx[i] + 1])
else:
with open(("%s_ligand_docked.pdbqt" % str(i + 1)), 'w') as fp:
# Move file pointer to the beginning of a file
fp.seek(0)
# Truncate the file
fp.truncate()
# Remove lines (first and last) that breaks Pose Scoring code
fp.writelines(lines[indx[i]:indx[i + 1]])
[docs]
def generate_pose(
protein_address: str,
ligand_address: str,
output: str,
exhaustiveness: int = 10,
num_modes: int = 9,
save_pdbqt: bool = False,
) -> str:
"""
Generate VINA molecular docking poses.
Performs molecular docking between a protein and ligand using AutoDock VINA
to predict binding poses and affinities. Returns DeepChem addresses to all
generated files including PDB complexes, optional PDBQT files, and scores.
Parameters
----------
protein_address : str
DeepChem address of the protein PDB file
ligand_address : str
DeepChem address of the ligand file (PDB or SDF format)
output : str
Output name for the docking results (used as prefix for all files)
exhaustiveness : int, default=10
VINA exhaustiveness parameter (higher = more thorough search)
num_modes : int, default=9
Number of binding modes to generate (1-20 recommended)
save_pdbqt : bool, default=False
Whether to save PDBQT files in addition to PDB complexes
Returns
-------
str
DeepChem address to results JSON file containing:
- complex_addresses: Dict mapping mode names to PDB complex addresses
- scores_address: DeepChem address to scores JSON file
- pdbqt_addresses: Dict mapping mode names to PDBQT addresses (if save_pdbqt=True)
- docking_method, exhaustiveness, message
Raises
------
ImportError
If RDKit or AutoDock VINA are not installed
ValueError
If protein_address or ligand_address are empty
If no valid docking results are generated
Examples
--------
Basic docking with default parameters:
>>> result_address = generate_pose(
... protein_address="deepchem://user/protein.pdb",
... ligand_address="deepchem://user/ligand.sdf",
... output="docking_results"
... )
>>> results = json.loads(datastore.get(result_address))
>>> print(f"Generated {len(results['complex_addresses'])} binding modes")
Docking with PDBQT files and custom parameters:
>>> result_address = generate_pose(
... protein_address="deepchem://user/protein.pdb",
... ligand_address="deepchem://user/ligand.pdb",
... output="thorough_docking",
... exhaustiveness=20,
... num_modes=5,
... save_pdbqt=True
... )
>>> results = json.loads(datastore.get(result_address))
>>> scores = json.loads(datastore.get(results['scores_address']))
>>> print(f"Best binding affinity: {scores['mode 1']['affinity (kcal/mol)']} kcal/mol")
Notes
-----
- PDB complexes are always generated (one per binding mode)
- PDBQT files are only generated when save_pdbqt=True
- For multiple modes, PDBQT files are automatically split per mode
- Scores are stored in a separate JSON file for easy access
- All files are uploaded to the configured datastore
"""
datastore = config.get_datastore()
if datastore is None:
raise ValueError("Datastore not set")
if not protein_address or not ligand_address:
raise ValueError('Protein and/or ligand input is required.')
try:
# Check dependencies
if not RDKIT_AVAILABLE:
raise ImportError("RDKit is required for docking but not installed")
# Check if VINA is available (VinaPoseGenerator will fail if not)
try:
pg = VinaPoseGenerator()
except Exception as e:
raise ImportError(f"VINA/AutoDock VINA is required for docking but not available: {e}")
with tempfile.TemporaryDirectory() as tmp:
log_progress('docking', 10, f'downloading protein from {protein_address}')
protein_path = os.path.join(tmp, 'protein.pdb')
datastore.download_object(protein_address, protein_path)
log_progress('docking', 20, f'downloading ligand from {ligand_address}')
# Detect format from address and let DeepChem handle conversion
ligand_ext = '.sdf' if ligand_address.endswith('.sdf') else '.pdb'
ligand_path = os.path.join(tmp, f'ligand{ligand_ext}')
datastore.download_object(ligand_address, ligand_path)
log_progress('docking', 30, 'preparing molecules for VINA')
log_progress('docking', 40, 'initializing VINA pose generator')
log_progress('docking', 50, f'generating {num_modes} poses with VINA')
# Generate poses using file paths - DeepChem handles preparation internally
complexes, scores = pg.generate_poses(molecular_complex=(protein_path, ligand_path),
exhaustiveness=exhaustiveness,
num_modes=num_modes,
out_dir=tmp,
generate_scores=True)
# Validate that we got valid results
if not complexes or not scores:
raise ValueError("No docking poses or scores generated")
# Ensure we don't exceed available results
actual_modes = min(num_modes, len(complexes), len(scores))
if actual_modes == 0:
raise ValueError("No valid docking results generated")
log_progress('docking', 60, f'generated {actual_modes} valid poses')
# Handle PDBQT files if requested
pdbqt_addresses = {}
if save_pdbqt:
log_progress('docking', 65, 'processing PDBQT files')
docked_ligand_name = os.path.join(tmp, "temp_ligand_docked.pdbqt")
if os.path.exists(docked_ligand_name):
# Clean the PDBQT file (remove first and last lines that break pose scoring)
with open(docked_ligand_name, 'r+') as fp:
lines = fp.readlines()
fp.seek(0)
fp.truncate()
fp.writelines(lines[1:-1])
# Split PDBQT file if multiple modes
if actual_modes > 1:
# Change to temp directory for split files
original_cwd = os.getcwd()
os.chdir(tmp)
split_pdbqt_docked_ligands(docked_ligand_name, "VINA")
os.chdir(original_cwd)
# Upload PDBQT files for each mode
for i in range(actual_modes):
try:
pdbqt_filename = f"{output}_mode_{i + 1}.pdbqt"
pdbqt_card = DataCard(address='', file_type='pdbqt', data_type='text/plain')
if actual_modes == 1:
# Single mode: use the original file
pdbqt_content = open(docked_ligand_name, 'r').read()
else:
# Multiple modes: use the split files
split_filename = os.path.join(tmp, f"{i + 1}_ligand_docked.pdbqt")
if os.path.exists(split_filename):
pdbqt_content = open(split_filename, 'r').read()
# Clean up the split file
os.remove(split_filename)
else:
log_progress('docking', 68, f'Warning: Split PDBQT file not found for mode {i + 1}')
continue
pdbqt_address = datastore.upload_data_from_memory(pdbqt_content, pdbqt_filename, pdbqt_card)
if pdbqt_address:
pdbqt_addresses['mode %s' % (i + 1)] = pdbqt_address
log_progress('docking', 67, f'saved PDBQT for mode {i + 1}')
except Exception as e:
log_progress('docking', 68, f'failed to save PDBQT for mode {i + 1}: {e}')
else:
log_progress('docking', 66, 'Warning: PDBQT file not found in temp directory')
log_progress('docking', 70, 'preparing results')
# Format scores: always include requested mode keys; pad with last available score if needed
scores_formatted = {}
complex_addresses = {}
modes_to_report = max(actual_modes, num_modes)
for i in range(modes_to_report):
idx = min(i, actual_modes - 1)
scores_formatted['mode %s' % (i + 1)] = {'affinity (kcal/mol)': float(scores[idx])}
# Save complex PDB file for each pose
if idx < len(complexes) and complexes[idx] is not None:
try:
# Combine protein and ligand from the complex
complex_mol = Chem.CombineMols(complexes[idx][0], complexes[idx][1])
# Create complex file content
complex_content = Chem.MolToPDBBlock(complex_mol)
# Upload complex file
complex_filename = f"{output}_mode_{i + 1}.pdb"
complex_card = DataCard(address='', file_type='pdb', data_type='text/plain')
complex_address = datastore.upload_data_from_memory(complex_content, complex_filename,
complex_card)
if complex_address:
complex_addresses['mode %s' % (i + 1)] = complex_address
log_progress('docking', 75, f'saved complex for mode {i + 1}')
except Exception as e:
log_progress('docking', 76, f'failed to save complex for mode {i + 1}: {e}')
# Upload a standalone scores JSON and capture its datastore address
try:
scores_card = DataCard(address='', file_type='json', data_type='json')
scores_json_str = json.dumps(scores_formatted)
scores_address = datastore.upload_data_from_memory(scores_json_str, f"{output}_scores.json",
scores_card)
except Exception as e:
scores_address = None
log_progress('docking', 72, f'failed to upload scores JSON: {e}')
results = {
'docking_method': 'VINA',
'exhaustiveness': exhaustiveness,
'complex_addresses': complex_addresses,
'scores_address': scores_address,
'message': 'VINA docking completed successfully',
}
# Add PDBQT addresses if they were generated
if pdbqt_addresses:
results['pdbqt_addresses'] = pdbqt_addresses
log_progress('docking', 90, 'uploading results summary')
# Upload results summary: file is JSON, logical data type is 'json'
card = DataCard(address='', file_type='json', data_type='json')
results_json = json.dumps(results)
result_address = datastore.upload_data_from_memory(results_json, f"{output}_results.json", card)
if result_address is None:
raise ValueError("Failed to upload docking results to datastore")
log_progress('docking', 100, 'VINA docking completed successfully')
return result_address
except Exception as e:
raise Exception(f'VINA docking failed: {str(e)}')