Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/d0a4eb3fb962
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:16:54 2012
Кодировка:
allpy: d0a4eb3fb962

allpy

changeset 792:d0a4eb3fb962

pair_cores_all: upgrade to current pair_cores TODO: * documentation ? skip input if output exists ? continue processing other tasks on exception Was tested with file pfam_PF00001
author boris (kodomo) <bnagaev@gmail.com>
date Wed, 13 Jul 2011 03:31:06 +0400
parents 559485da1951
children 0d6c1227ede8
files pair_cores/pair_cores_all.py
diffstat 1 files changed, 58 insertions(+), 41 deletions(-) [+]
line diff
     1.1 --- a/pair_cores/pair_cores_all.py	Wed Jul 13 02:19:40 2011 +0400
     1.2 +++ b/pair_cores/pair_cores_all.py	Wed Jul 13 03:31:06 2011 +0400
     1.3 @@ -1,49 +1,66 @@
     1.4  #!/usr/bin/python
     1.5  
     1.6 -import sys
     1.7  import os
     1.8 -from multiprocessing import Pool
     1.9 -from copy import copy
    1.10 +import multiprocessing as mp
    1.11 +import argparse
    1.12  
    1.13 -from pair_cores import run, A
    1.14 +from pair_cores import homology_from_3d, CachedDownloadPdb, cached_download_pdb
    1.15  
    1.16 -d_in = sys.argv[1]
    1.17 -d_out = sys.argv[2]
    1.18 -name = sys.argv[3]
    1.19 -cpu_count = int(sys.argv[4])
    1.20 +def homology_from_markup(markup_filename, homology_filename,
    1.21 +        max_delta, pdb_getter=cached_download_pdb):
    1.22 +    markup_file = open(markup_filename)
    1.23 +    homology_file = open(homology_filename, 'w')
    1.24 +    homology_from_3d(markup_file=markup_file, homology_file=homology_file,
    1.25 +        max_delta=max_delta, pdb_getter=pdb_getter)
    1.26 +    markup_file.close()
    1.27 +    homology_file.close()
    1.28  
    1.29 -def run2(args):
    1.30 -    print args.i
    1.31 -    args.i = open(args.i)
    1.32 -    args.o = open(args.o, 'w')
    1.33 -    args.H = open(args.H, 'w')
    1.34 -    args.b = open(args.b, 'w')
    1.35 +class Binder(object):
    1.36 +    def __init__(self, func, **kwargs):
    1.37 +        self.func = func
    1.38 +        self.kwargs = kwargs
    1.39 +    def __call__(self):
    1.40 +        self.func(**self.kwargs)
    1.41 +
    1.42 +def homology_from_markup_all(input_dir, output_dir, max_delta, list_file=None,
    1.43 +        suffix=".homology.xls", pdb_getter=cached_download_pdb, cpu_count=0):
    1.44 +    """ FIXME """
    1.45 +    if list_file:
    1.46 +        files = open(list_file)
    1.47 +    else:
    1.48 +        files = os.listdir(input_dir)
    1.49 +    tasks = []
    1.50 +    for file in files:
    1.51 +        file = file.strip()
    1.52 +        path = os.path.join(input_dir, file)
    1.53 +        if os.path.isfile(path):
    1.54 +            tasks.append(Binder(homology_from_markup, markup_filename=path,
    1.55 +                homology_filename=os.path.join(output_dir, file+suffix),
    1.56 +                max_delta=max_delta, pdb_getter=pdb_getter))
    1.57 +    cpu_count = cpu_count or mp.cpu_count()
    1.58 +    pool = mp.Pool(processes=cpu_count)
    1.59 +    pool.map(apply, tasks)
    1.60 +
    1.61 +def main():
    1.62 +    r = argparse.FileType('r')
    1.63 +    p = argparse.ArgumentParser(description='Pdb markups to homology converter',
    1.64 +        epilog='If List of names is not provided, uses all regular files of given directory',
    1.65 +        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    1.66 +    p.add_argument('-v','--version',action='version',version='%(prog)s 2.0')
    1.67 +    p.add_argument('-i',help='Directory with markups',metavar='DIR',type=str,required=True)
    1.68 +    p.add_argument('-o',help='Directory to write output homology files',metavar='DIR',type=str,required=True)
    1.69 +    p.add_argument('-l',help='List of names input files (relative to -i)',metavar='FILE',type=r,required=False)
    1.70 +    p.add_argument('-d',help='Distance spreading',metavar='float',type=float,default=2.0)
    1.71 +    p.add_argument('-s',help='Suffix added to names of output files',metavar='STR',type=str,default=".homology.xls")
    1.72 +    p.add_argument('-c',help='Pdb cache directory',metavar='DIR',type=str, default='pdb_cache')
    1.73 +    p.add_argument('-j',help='Number of workers',metavar='INT',type=int, default=2)
    1.74 +    args = p.parse_args()
    1.75 +    homology_from_markup_all(input_dir=args.i, output_dir=args.o, max_delta=args.d, list_file=args.l,
    1.76 +        suffix=args.s, pdb_getter=CachedDownloadPdb(cache_dir=args.c), cpu_count=args.j)
    1.77 +
    1.78 +if __name__ == '__main__':
    1.79      try:
    1.80 -        run(args)
    1.81 -    except:
    1.82 -        pass
    1.83 -    args.i.close()
    1.84 -    args.o.close()
    1.85 -    args.H.close()
    1.86 -    args.b.close()
    1.87 +        main()
    1.88 +    except Exception, e:
    1.89 +        print e
    1.90  
    1.91 -def tasks():
    1.92 -    for file in os.listdir(d_in):
    1.93 -        args = A()
    1.94 -        args.i = os.path.join(d_in, file)
    1.95 -        Dir = os.path.join(d_out, file)
    1.96 -        if not os.path.exists(Dir):
    1.97 -            os.mkdir(Dir)
    1.98 -        args.o = os.path.join(Dir, 'structure_only.fasta')
    1.99 -        for d in [2.0,2.5]:
   1.100 -            args.d = d
   1.101 -            args.H = os.path.join(Dir, '%s-%.1f.html'%(name,d))
   1.102 -            blocks_filename = os.path.join(Dir, '%s-%.1f.blocks'%(name,d))
   1.103 -            args.b = blocks_filename
   1.104 -            if not os.path.exists(blocks_filename) or os.path.getsize(blocks_filename) < 10:
   1.105 -                yield copy(args)
   1.106 -
   1.107 -pool = Pool(processes=cpu_count)
   1.108 -pool.map(run2, tasks())
   1.109 -
   1.110 -