#!/usr/bin/env python import IMP import IMP.saxs import os import re import sys import math import shutil import tempfile import subprocess import optparse import inspect class TempDir(object): """Make a temporary directory, and delete it when the object is destroyed""" def __init__(self): self.tmpdir = tempfile.mkdtemp() def __del__(self): shutil.rmtree(self.tmpdir, ignore_errors=True) def _count_lines(filename): """Count the number of lines in the file""" wc = 0 for line in open(filename): wc += 1 return wc def _count_fiber_dock_out(filename): """Count the number of transformations in a FiberDock output file""" wc = 0 header_found = False for line in open(filename): if '|' in line: if header_found: wc += 1 elif 'glob' in line: header_found = True return wc def _run_binary(path, binary, args, out_file=None): if path: binary = os.path.join(path, binary) cmd = ' '.join([binary] + args) if out_file: stdout = open(out_file, 'w') cmd += ' > ' + out_file else: stdout = sys.stdout print cmd p = subprocess.Popen([binary] + args, stdout=stdout, stderr=sys.stderr) ret = p.wait() if ret != 0: raise OSError("subprocess failed with exit code %d" % ret) class Scorer(object): """Score transformations using a type of experimental data. To add support for a new type of data, simply create a new Scorer subclass and override its methods and variables.""" transformations_file = 'trans_pd' # Number of transforms to be passed to FiberDock; more accurate scoring # methods can get away with fewer transforms transforms_needed = 5000 # If set to True, higher scores are considered better when calculating # zscores reverse_zscores = False # A short string (< 8 characters) identifying the method short_name = None @classmethod def add_parser_options(cls, parser): """Add command line options to the given parser""" pass @classmethod def check_options(cls, idock, parser): """Check command line options for consistency, and create and return a new instance if selected by these options""" pass def __init__(self, idock, output_type): self.receptor = idock.receptor self.ligand = idock.ligand self.output_file = idock.get_filename("%s.res" % output_type) self.zscore_output_file = idock.get_filename("%sf.res" % output_type) def score(self, num_transforms): if os.path.exists(self.output_file) \ and _count_lines(self.output_file) >= num_transforms: print "Skipping %s for %s" % (str(self), self.receptor) else: self._run_score_binary() def recompute_zscore(self, transforms_file): """Recompute z-scores for the new set of transformations in transforms_file""" d = TempDir() tmp_tf = os.path.join(d.tmpdir, 'tmp') solutions = [] num_re = re.compile('\d') # Read all solutions for line in open(self.output_file): spl = line.split('|') if len(spl) > 1 and num_re.search(spl[0]): solutions.append(line) # Extract only the solutions that are mentioned in transforms_file fh = open(tmp_tf, 'w') for line in open(transforms_file): spl = line.split() if len(spl) > 1: fh.write(solutions[int(spl[0])-1]) fh.close() # Recompute z-scores for the new set args = [tmp_tf] if self.reverse_zscores: args.append('1') _run_binary(None, 'recompute_zscore', args, out_file=self.zscore_output_file) class NMRScorer(Scorer): """Score transformations using NMR residue type content""" short_name = 'nmr_rtc' @classmethod def add_parser_options(cls, parser): g = optparse.OptionGroup(parser, "NMR-RTC") g.add_option('--receptor_rtc', metavar='FILE', help="File name of the receptor NMR residue type content") g.add_option('--ligand_rtc', metavar='FILE', help="File name of the ligand NMR residue type content") parser.add_option_group(g) @classmethod def check_options(cls, idock, parser): if idock.opts.receptor_rtc or idock.opts.ligand_rtc: return cls(idock) def __init__(self, idock): Scorer.__init__(self, idock, "nmr_rtc_score") self.receptor_rtc = idock.opts.receptor_rtc self.ligand_rtc = idock.opts.ligand_rtc if idock.opts.type == 'AA': self.receptor_rtc, self.ligand_rtc = self.ligand_rtc, '-' def __str__(self): return "NMR score" def _run_score_binary(self): _run_binary(None, "nmr_rtc_score", [self.receptor, self.ligand, self.transformations_file, self.receptor_rtc, self.ligand_rtc, '-o', self.output_file]) class SAXSScorer(Scorer): """Score transformations using SAXS (rg + chi)""" short_name = 'saxs' @classmethod def add_parser_options(cls, parser): g = optparse.OptionGroup(parser, "SAXS") g.add_option('--saxs', metavar='FILE', dest='saxs_file', help="File name of the complex SAXS profile") g.add_option('--saxs_receptor_pdb', metavar='FILE', help='Additional receptor structure for SAXS scoring ' 'with modeled missing atoms/residues. ' 'This structure should be aligned to the ' 'input receptor!') g.add_option('--saxs_ligand_pdb', metavar='FILE', help='Additional ligand structure for SAXS scoring ' 'with modeled missing atoms/residues. ' 'This structure should be aligned to the ' 'input ligand!') parser.add_option_group(g) @classmethod def check_options(cls, idock, parser): if idock.opts.saxs_file: return cls(idock) def __init__(self, idock): Scorer.__init__(self, idock, "saxs_score") self.saxs_file = idock.opts.saxs_file self.saxs_receptor = idock.opts.saxs_receptor_pdb or idock.receptor self.saxs_ligand = idock.opts.saxs_ligand_pdb or idock.ligand def __str__(self): return "SAXS score" def _run_score_binary(self): _run_binary(None, "saxs_score", [self.saxs_receptor, self.saxs_ligand, self.transformations_file, self.saxs_file, '-o', self.output_file]) class EM2DScorer(Scorer): """Score transformations using EM2D""" short_name = 'em2d' @classmethod def add_parser_options(cls, parser): g = optparse.OptionGroup(parser, "EM2D") g.add_option('--em2d', metavar='FILE', action='append', default=[], dest='class_averages', help="File name of a complex 2D class average in PGM " "format. This option can be repeated to use " "multiple class averages (up to 5 in total)") g.add_option('--pixel_size', type='float', default=0., help='Pixel size for EM2D images') parser.add_option_group(g) @classmethod def check_options(cls, idock, parser): if idock.opts.class_averages: if idock.opts.pixel_size <= 0.: parser.error("please specify pixel size for 2D images with " "--pixel_size option") else: return cls(idock) def __init__(self, idock): Scorer.__init__(self, idock, "em2d_score") self.class_averages = idock.opts.class_averages self.pixel_size = idock.opts.pixel_size def __str__(self): return "EM2D score" def _run_score_binary(self): _run_binary(None, "em2d_score", [self.receptor, self.ligand, self.transformations_file] \ + self.class_averages \ + ['-o', self.output_file, '-n', '200', '-s', str(self.pixel_size)]) class EM3DScorer(Scorer): """Score transformations using EM3D""" short_name = 'em3d' transforms_needed = 1000 reverse_zscores = True @classmethod def add_parser_options(cls, parser): g = optparse.OptionGroup(parser, "EM3D") g.add_option('--em3d', metavar='FILE', dest='map_file', help="File name of the complex density map in mrc format") parser.add_option_group(g) @classmethod def check_options(cls, idock, parser): if idock.opts.map_file: return cls(idock) def __init__(self, idock): Scorer.__init__(self, idock, "em3d_score") self.map_file = idock.opts.map_file def __str__(self): return "EM3D score" def _run_score_binary(self): _run_binary(None, "em3d_score", [self.receptor, self.ligand, self.transformations_file, self.map_file, '-o', self.output_file, '-s']) class CXMSScorer(Scorer): """Score transformations using CXMS""" short_name = 'cxms' transforms_needed = 2000 reverse_zscores = True @classmethod def add_parser_options(cls, parser): g = optparse.OptionGroup(parser, "CXMS") g.add_option('--cxms', metavar='FILE', dest='cross_links_file', help="File name of the cross links file") parser.add_option_group(g) @classmethod def check_options(cls, idock, parser): if idock.opts.cross_links_file: return cls(idock) def __init__(self, idock): Scorer.__init__(self, idock, "cxms_score") self.cross_links_file = idock.opts.cross_links_file def __str__(self): return "CXMS score" def _run_score_binary(self): _run_binary(None, "cross_links_score", [self.receptor, self.ligand, self.transformations_file, self.cross_links_file, '-o', self.output_file]) class IDock(object): """Handle all stages of the integrative docking protocol""" # Get list of all Scorer subclasses _all_scorers = [x[1] for x in inspect.getmembers(sys.modules[__name__], lambda x: inspect.isclass(x) \ and issubclass(x, Scorer) and x is not Scorer)] def parse_args(self): """Parse command line options, and return all applicable Scorers""" data_types = [x.short_name.upper() for x in self._all_scorers] data_types = ", ".join(data_types[:-1]) + " and/or " + data_types[-1] epilog = """Note that in order to run this application, you must also have PatchDock and FiberDock (available from http://bioinfo3d.cs.tau.ac.il/FiberDock/) and reduce (available from http://kinemage.biochem.duke.edu/software/reduce.php) installed. See the --patch-dock and --fiber-dock parameters to tell idock where the PatchDock and FiberDock programs are installed. idock expects to find 'reduce' in the system path.""" parser = IMP.OptionParser(usage="%prog [options] " "", description="Integrative pairwise docking " "using %s data" % data_types, epilog=epilog, imp_module=IMP.saxs) choices=['EI', 'AA', 'other'] parser.add_option('--complex_type', metavar='TYPE', type='choice', dest="type", choices=choices, default='other', help='/'.join(choices) + '; use this order for ' 'receptor-ligand: ' 'antibody-antigen, enzyme-inhibitor') parser.add_option('--patch_dock', metavar='DIR', default=os.environ.get('PATCH_DOCK_HOME', None), help='Directory where PatchDock tools are installed. ' 'If not specified, the value of the ' 'PATCH_DOCK_HOME environment variable is used ' 'if set, otherwise the tools are assumed to be ' 'in the default path.') parser.add_option('--fiber_dock', metavar='DIR', default=os.environ.get('FIBER_DOCK_HOME', None), help='Directory where FiberDock tools are installed. ' 'If not specified, the value of the ' 'FIBER_DOCK_HOME environment variable is used ' 'if set, otherwise the tools are assumed to be ' 'in the default path.') parser.add_option('--prefix', default='', help='Add prefix string (separated by an underscore) ' 'to filenames generated by the current run') parser.add_option('--precision', type='choice', choices=['1', '2', '3'], default='1', help='Sampling precision for rigid docking: ' '1-normal, 2-medium, 3-high. The higher the ' 'precision, the higher are the run times') for s in self._all_scorers: s.add_parser_options(parser) opts, args = parser.parse_args() if len(args) != 2: parser.error("incorrect number of arguments") opts.precision = int(opts.precision) if opts.prefix: opts.prefix += '_' self.opts = opts self.receptor, self.ligand = args scorers = self.get_scorers(parser) if len(scorers) == 0: parser.error("please provide %s experimental data" % data_types) return scorers def get_scorers(self, parser): """Get all scorers that are applicable to the command line options""" scorers = [] for s in self._all_scorers: si = s.check_options(self, parser) if si: scorers.append(si) return scorers def run_patch_dock_binary(self, binary, args): """Run a binary that is part of the PatchDock distribution""" _run_binary(self.opts.patch_dock, binary, args) def run_fiber_dock_binary(self, binary, args): """Run a binary that is part of the FiberDock distribution""" _run_binary(self.opts.fiber_dock, binary, args) def make_patch_dock_surfaces(self): """Make molecular surfaces for PatchDock""" self.run_patch_dock_binary('buildMS.pl', [self.receptor, self.ligand]) def make_patch_dock_parameters(self): """Make parameter file for PatchDock""" if self.opts.precision == 1: rmsd = '4.0' else: rmsd = '2.0' if self.opts.precision == 3: script = 'buildParamsFine.pl' else: script = 'buildParams.pl' self.run_patch_dock_binary(script, [self.receptor, self.ligand, rmsd, self.opts.type]) def get_filename(self, fn): """Get a filename, with user-defined prefix if given""" return self.opts.prefix + fn def get_all_scores_filename(self, scorers, prefix, suffix): """Get a filename containing the names of all scores used""" return self.get_filename(prefix \ + '_'.join([x.short_name for x in scorers]) + suffix) def do_patch_dock_docking(self): """Do PatchDock docking, using previously generated surfaces and parameter files""" out_file = self.get_filename('docking.res') # Skip if PatchDock output file exists and contains transformations # (not just the header containing parameter information) if os.path.exists(out_file) and _count_lines(out_file) > 36: print "Skipping PatchDock for %s" % self.receptor else: self.run_patch_dock_binary('patch_dock.Linux', ['params.txt', out_file]) def make_transformation_file(self): """Extract transformation image from PatchDock output file""" out_file = self.get_filename('docking.res') num_re = re.compile('\d') num_transforms = 0 fout = open('trans_pd', 'w') for line in open(out_file): fields = line.split('|') if len(fields) > 1 and num_re.search(fields[0]): num_transforms += 1 print >> fout, int(fields[0]), fields[-1].strip(' \r\n') if self.opts.precision == 1 and num_transforms >= 5000: break return num_transforms def run_patch_dock(self): """Run PatchDock on the ligand and receptor""" self.make_patch_dock_surfaces() self.make_patch_dock_parameters() self.do_patch_dock_docking() num_transforms = self.make_transformation_file() # Swap ligand/receptor if we're doing AA if self.opts.type == 'AA': self.ligand, self.receptor = self.receptor, self.ligand self.opts.saxs_receptor_pdb, self.opts.saxs_ligand_pdb = \ self.opts.saxs_ligand_pdb, self.opts.saxs_receptor_pdb return num_transforms def get_filtered_scores(self, scorers): """Get scores that were filtered by an experimental method""" # If only one score, simply extract from its file (note we extract the # z-score, not the raw score, since the combined score is the weighted # sum of z-scores, not raw scores) if len(scorers) == 1: out_fh = open('trans_for_cluster', 'w') in_fh = open(scorers[0].output_file) for line in in_fh: spl = line.split('|') if '+' in line and '#' not in line and len(spl) > 1: out_fh.write("%s %s %s" % (spl[0], spl[3], spl[-1])) else: out_file = self.get_all_scores_filename(scorers, 'combined_', '.res') args = [] for s in scorers: args.extend([s.output_file, '1.0']) _run_binary(None, 'combine_scores', args, out_file=out_file) out_fh = open('trans_for_cluster', 'w') in_fh = open(out_file) for line in in_fh: spl = line.split('|') if '#' not in line and len(spl) > 1: out_fh.write("%s %s %s" % (spl[0], spl[1], spl[-1])) def get_clustered_transforms(self, scorers): """Cluster transformations with PatchDock""" out_file = self.get_all_scores_filename(scorers, 'clustered_', '.res') self.run_patch_dock_binary('interface_cluster.linux', [self.receptor + '.ms', self.ligand, 'trans_for_cluster', '4.0', out_file]) return out_file def get_transforms_for_fiber_dock(self, scorers, transforms_file): """Convert PatchDock-clustered transforms into FiberDock input""" trans_for_fd = self.get_all_scores_filename(scorers, 'trans_for_fd_', '') transforms_needed = min([s.transforms_needed for s in scorers]) fh_in = open(transforms_file) fh_out = open(trans_for_fd, 'w') for line in fh_in: spl = line.split('|') if len(spl) > 1: print >> fh_out, spl[0] + " " + spl[4].rstrip('\r\n') transforms_needed -= 1 if transforms_needed <= 0: break return trans_for_fd def add_hydrogens(self, in_pdb): """Run reduce to add hydrogens to in_pdb; return the name of the new PDB with hydrogens""" out_pdb = in_pdb + '.HB' _run_binary(None, 'reduce', ['-OH', '-HIS', '-NOADJ', '-NOROTMET', in_pdb], out_file=out_pdb) return out_pdb def make_fiber_dock_parameters(self, scorers, receptorH, ligandH, trans_for_fd): params = self.get_all_scores_filename(scorers, 'params_fd_', '.txt') fd_out = self.get_all_scores_filename(scorers, 'fd_res_', '') self.run_fiber_dock_binary('buildFiberDockParams.pl', [receptorH, ligandH, 'U', 'U', self.opts.type, trans_for_fd, fd_out, '0', '50', '0.8', '0', 'glpk', params]) return params, fd_out + '.ref' def do_fiber_dock_docking(self, trans_for_fd, params, fd_out): if os.path.exists(fd_out) \ and _count_fiber_dock_out(fd_out) == _count_lines(trans_for_fd): print "Skipping FiberDock for %s" % self.receptor return else: self.run_fiber_dock_binary('FiberDock', [params]) def convert_fiber_dock_to_score(self, scorers, fd_out): """Convert FiberDock or FireDock output file to the same format as the scorers""" out = self.get_all_scores_filename(scorers, 'fd_res_', '.res') solutions = [] fire_dock = False num_re = re.compile('\d') # Extract solutions for line in open(fd_out): spl = line.split('|') if len(spl) > 1 and num_re.search(spl[0]): num = int(spl[0]) if fire_dock: solutions.append((num, float(spl[5]), spl[-1].rstrip('\r\n'))) else: solutions.append((num, float(spl[1]), spl[-3])) elif len(spl) > 5 and 'glob' in spl[5]: fire_dock = True # Compute Z-score average = 0. stddev = 0. for solution in solutions: score = solution[1] average += score stddev += (score * score) average /= len(solutions) stddev = math.sqrt(stddev / len(solutions) - (average * average)) # Output in new format fh = open(out, 'w') print >> fh, " # | Score | filt| ZScore | Transformation" for solution in solutions: num, score, trans = solution zscore = (score - average) / stddev print >> fh, " %4d | %5.3f | + | %5.3f | %s" \ % (num, score, zscore, trans) return out def run_fiber_dock(self, scorers, transforms_file): trans_for_fd = self.get_transforms_for_fiber_dock(scorers, transforms_file) receptorH = self.add_hydrogens(self.receptor) ligandH = self.add_hydrogens(self.ligand) params, fd_out = self.make_fiber_dock_parameters(scorers, receptorH, ligandH, trans_for_fd) self.do_fiber_dock_docking(trans_for_fd, params, fd_out) return trans_for_fd, self.convert_fiber_dock_to_score(scorers, fd_out) def combine_final_scores(self, scorers, fd_out): """Combine scores for the final set with those from FireDock""" out_file = self.get_filename('combined_final.res') args = [] for s in scorers: args.extend([s.zscore_output_file, '1.0']) args.extend([fd_out, '1.0']) _run_binary(None, 'combine_scores', args, out_file=out_file) return out_file def write_results(self, scorers, comb_final): """Write final results file""" out_fname = self.get_all_scores_filename(scorers, 'results_', '.txt') # Get all solutions solutions = [] for line in open(comb_final): spl = line.split('|') if len(spl) > 1 and 'Score' not in line: solutions.append(spl) # Sort by z-score solutions.sort(key=lambda x: float(x[3])) # Write results file fh = open(out_fname, 'w') print >> fh, "receptorPdb Str " + self.receptor print >> fh, "ligandPdb Str " + self.ligand print >> fh, " # | Score | filt| ZScore |" \ + "|".join(["%-8s| Zscore " % s.short_name for s in scorers]) \ + "| Energy | Zscore | Transformation" for i, solution in enumerate(solutions): fh.write("%6d | " % (i+1) + "|".join(solution[1:])) return out_fname def main(self): """Run the entire protocol""" scorers = self.parse_args() num_transforms = self.run_patch_dock() for scorer in scorers: scorer.score(num_transforms) self.get_filtered_scores(scorers) transforms_file = self.get_clustered_transforms(scorers) trans_for_fd, fd_out = self.run_fiber_dock(scorers, transforms_file) for scorer in scorers: scorer.recompute_zscore(trans_for_fd) fh = self.combine_final_scores(scorers, fd_out) self.write_results(scorers, fh) if __name__ == "__main__": dock = IDock() dock.main()