allpy
changeset 792:d0a4eb3fb962
pair_cores_all: upgrade to current pair_cores
TODO:
* documentation
? skip input if output exists
? continue processing other tasks on exception
Was tested with file pfam_PF00001
author | boris (kodomo) <bnagaev@gmail.com> |
---|---|
date | Wed, 13 Jul 2011 03:31:06 +0400 |
parents | 559485da1951 |
children | 0d6c1227ede8 |
files | pair_cores/pair_cores_all.py |
diffstat | 1 files changed, 58 insertions(+), 41 deletions(-) [+] |
line diff
1.1 --- a/pair_cores/pair_cores_all.py Wed Jul 13 02:19:40 2011 +0400 1.2 +++ b/pair_cores/pair_cores_all.py Wed Jul 13 03:31:06 2011 +0400 1.3 @@ -1,49 +1,66 @@ 1.4 #!/usr/bin/python 1.5 1.6 -import sys 1.7 import os 1.8 -from multiprocessing import Pool 1.9 -from copy import copy 1.10 +import multiprocessing as mp 1.11 +import argparse 1.12 1.13 -from pair_cores import run, A 1.14 +from pair_cores import homology_from_3d, CachedDownloadPdb, cached_download_pdb 1.15 1.16 -d_in = sys.argv[1] 1.17 -d_out = sys.argv[2] 1.18 -name = sys.argv[3] 1.19 -cpu_count = int(sys.argv[4]) 1.20 +def homology_from_markup(markup_filename, homology_filename, 1.21 + max_delta, pdb_getter=cached_download_pdb): 1.22 + markup_file = open(markup_filename) 1.23 + homology_file = open(homology_filename, 'w') 1.24 + homology_from_3d(markup_file=markup_file, homology_file=homology_file, 1.25 + max_delta=max_delta, pdb_getter=pdb_getter) 1.26 + markup_file.close() 1.27 + homology_file.close() 1.28 1.29 -def run2(args): 1.30 - print args.i 1.31 - args.i = open(args.i) 1.32 - args.o = open(args.o, 'w') 1.33 - args.H = open(args.H, 'w') 1.34 - args.b = open(args.b, 'w') 1.35 +class Binder(object): 1.36 + def __init__(self, func, **kwargs): 1.37 + self.func = func 1.38 + self.kwargs = kwargs 1.39 + def __call__(self): 1.40 + self.func(**self.kwargs) 1.41 + 1.42 +def homology_from_markup_all(input_dir, output_dir, max_delta, list_file=None, 1.43 + suffix=".homology.xls", pdb_getter=cached_download_pdb, cpu_count=0): 1.44 + """ FIXME """ 1.45 + if list_file: 1.46 + files = open(list_file) 1.47 + else: 1.48 + files = os.listdir(input_dir) 1.49 + tasks = [] 1.50 + for file in files: 1.51 + file = file.strip() 1.52 + path = os.path.join(input_dir, file) 1.53 + if os.path.isfile(path): 1.54 + tasks.append(Binder(homology_from_markup, markup_filename=path, 1.55 + homology_filename=os.path.join(output_dir, file+suffix), 1.56 + max_delta=max_delta, pdb_getter=pdb_getter)) 1.57 + cpu_count = cpu_count or mp.cpu_count() 1.58 + pool = mp.Pool(processes=cpu_count) 1.59 + pool.map(apply, tasks) 1.60 + 1.61 +def main(): 1.62 + r = argparse.FileType('r') 1.63 + p = argparse.ArgumentParser(description='Pdb markups to homology converter', 1.64 + epilog='If List of names is not provided, uses all regular files of given directory', 1.65 + formatter_class=argparse.ArgumentDefaultsHelpFormatter) 1.66 + p.add_argument('-v','--version',action='version',version='%(prog)s 2.0') 1.67 + p.add_argument('-i',help='Directory with markups',metavar='DIR',type=str,required=True) 1.68 + p.add_argument('-o',help='Directory to write output homology files',metavar='DIR',type=str,required=True) 1.69 + p.add_argument('-l',help='List of names input files (relative to -i)',metavar='FILE',type=r,required=False) 1.70 + p.add_argument('-d',help='Distance spreading',metavar='float',type=float,default=2.0) 1.71 + p.add_argument('-s',help='Suffix added to names of output files',metavar='STR',type=str,default=".homology.xls") 1.72 + p.add_argument('-c',help='Pdb cache directory',metavar='DIR',type=str, default='pdb_cache') 1.73 + p.add_argument('-j',help='Number of workers',metavar='INT',type=int, default=2) 1.74 + args = p.parse_args() 1.75 + homology_from_markup_all(input_dir=args.i, output_dir=args.o, max_delta=args.d, list_file=args.l, 1.76 + suffix=args.s, pdb_getter=CachedDownloadPdb(cache_dir=args.c), cpu_count=args.j) 1.77 + 1.78 +if __name__ == '__main__': 1.79 try: 1.80 - run(args) 1.81 - except: 1.82 - pass 1.83 - args.i.close() 1.84 - args.o.close() 1.85 - args.H.close() 1.86 - args.b.close() 1.87 + main() 1.88 + except Exception, e: 1.89 + print e 1.90 1.91 -def tasks(): 1.92 - for file in os.listdir(d_in): 1.93 - args = A() 1.94 - args.i = os.path.join(d_in, file) 1.95 - Dir = os.path.join(d_out, file) 1.96 - if not os.path.exists(Dir): 1.97 - os.mkdir(Dir) 1.98 - args.o = os.path.join(Dir, 'structure_only.fasta') 1.99 - for d in [2.0,2.5]: 1.100 - args.d = d 1.101 - args.H = os.path.join(Dir, '%s-%.1f.html'%(name,d)) 1.102 - blocks_filename = os.path.join(Dir, '%s-%.1f.blocks'%(name,d)) 1.103 - args.b = blocks_filename 1.104 - if not os.path.exists(blocks_filename) or os.path.getsize(blocks_filename) < 10: 1.105 - yield copy(args) 1.106 - 1.107 -pool = Pool(processes=cpu_count) 1.108 -pool.map(run2, tasks()) 1.109 - 1.110 -