Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/78597d31d423
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:15:35 2012
Кодировка:
allpy: 78597d31d423

allpy

changeset 784:78597d31d423

refactoring of pair_cores.py * add pdb-markup reading (user can provide pdb markup or alignment) * add CachedDownloadPdb as pdb_getter and allow change chache_dir * drop useless removing of monomers without structure (it should be already done by previous steps) * add messages to exceptions and documentation * rename run() to homology_from_3d() * hardening: raise if a structure at least one of sequences can't be loaded see #86 It seems not to work with markups (returns no blocks)
author boris (kodomo) <bnagaev@gmail.com>
date Tue, 12 Jul 2011 23:57:18 +0400
parents 84ac7e748e26
children 6c0d6d3ceafe b6c5178a4337
files pair_cores/pair_cores.py
diffstat 1 files changed, 59 insertions(+), 61 deletions(-) [+]
line diff
     1.1 --- a/pair_cores/pair_cores.py	Wed Jul 13 00:41:11 2011 +0400
     1.2 +++ b/pair_cores/pair_cores.py	Tue Jul 12 23:57:18 2011 +0400
     1.3 @@ -8,88 +8,86 @@
     1.4  from copy import copy
     1.5  
     1.6  from protein_pdb import Alignment, Block, Monomer, Sequence
     1.7 -from allpy import processors
     1.8 +from allpy import processors, markups
     1.9  import allpy.base
    1.10  from html import html_template
    1.11 +from allpy.structure import CachedDownloadPdb, cached_download_pdb
    1.12  
    1.13 -def remove_monomers_without_structure(block):
    1.14 -    monomers_with_structure = set()
    1.15 +for path in list(sys.path):
    1.16 +    if 'allpy' in path:
    1.17 +        sys.path.append(path + '/utils')
    1.18 +try:
    1.19 +    import extract_pfam
    1.20 +except ImportError:
    1.21 +    print "source SETPATH script!"
    1.22 +    sys.exit()
    1.23  
    1.24 -    for sequence in block.sequences:
    1.25 -        for monomer in sequence:
    1.26 -            if hasattr(monomer, 'ca_xyz'):
    1.27 -                monomers_with_structure.add(monomer)
    1.28 +markups.SequencePdbResiMarkup = extract_pfam.SequencePdbResiMarkup
    1.29  
    1.30 -    for sequence in block.sequences:
    1.31 -        to_delete = []
    1.32 -        for i, monomer in enumerate(sequence):
    1.33 -            if monomer not in monomers_with_structure:
    1.34 -                to_delete.append(i)
    1.35 -        for n, i in enumerate(to_delete):
    1.36 -            assert not hasattr(sequence.pop(i-n), 'ca_xyz')
    1.37 +def homology_from_3d(markup_file, homology_file, max_delta, alignment_file=None,
    1.38 +    out_alignment_file=None, out_pair_cores_file=None, out_html_file=None,
    1.39 +    pdb_getter=cached_download_pdb):
    1.40 +    """ Turn pdb markup into homology_file
    1.41  
    1.42 -    for column in block.columns:
    1.43 -        for s, m in column.items():
    1.44 -            if m not in monomers_with_structure:
    1.45 -                assert not hasattr(column.pop(s), 'ca_xyz')
    1.46 -
    1.47 -def run(args):
    1.48 -
    1.49 +    * markup_file -- file with pdb markup of alignment
    1.50 +    * homology_file -- output file with homology_file
    1.51 +    * alignment_file -- file with alignment_file (do not use with markup_file)
    1.52 +      Identifiers of pdb residues are calculated using pairwise alignment.
    1.53 +    * out_alignment_file -- output file with alignment (backwards-compatibility)
    1.54 +    * out_pair_cores_file -- output file with pair geomatrical cores
    1.55 +    * out_html_file -- output file with HTML representation of blocks
    1.56 +    * pdb_getter -- see structure.SequenceMixin.auto_pdb
    1.57 +    """
    1.58 +    if markup_file:
    1.59 +        input_file = markup_file
    1.60 +        input_file_format = 'markup'
    1.61 +    elif alignment_file:
    1.62 +        input_file = alignment_file
    1.63 +        input_file_format = 'fasta'
    1.64 +    else:
    1.65 +        raise Exception("Provide input alignment or markup file")
    1.66      try:
    1.67 -        alignment = Alignment().append_file(args.i)
    1.68 +        alignment = Alignment().append_file(input_file, format=input_file_format)
    1.69      except:
    1.70 -        raise Exception()
    1.71 -
    1.72 -    unique_sequences = list(set([s.name for s in alignment.sequences]))
    1.73 -    if len(unique_sequences) < len(alignment.sequences):
    1.74 -        alignment.sequences = unique_sequences
    1.75 -
    1.76 -    bad_sequences = set()
    1.77 +        raise Exception("Can not read input file %s" % input_file)
    1.78 +    unique_sequence_names = set([s.name for s in alignment.sequences])
    1.79 +    if len(unique_sequence_names) != len(alignment.sequences):
    1.80 +        raise Exception("Duplicate sequence names in %s" % input_file)
    1.81      for sequence in copy(alignment.sequences):
    1.82          try:
    1.83 -            sequence.auto_pdb(xyz_only=True)
    1.84 +            if markup_file:
    1.85 +                sequence.markups['pdb_resi'].add_pdb()
    1.86 +            else:
    1.87 +                sequence.auto_pdb(xyz_only=True, pdb_getter=pdb_getter)
    1.88          except:
    1.89 -            bad_sequences.add(sequence)
    1.90 -    if bad_sequences:
    1.91 -        alignment.sequences = list(set(alignment.sequences)-bad_sequences)
    1.92 -
    1.93 +            raise Exception("Can't load structure for sequence %s from file %s" % (sequence.name, input_file))
    1.94      if len(alignment.sequences) < 2:
    1.95 -        raise Exception()
    1.96 -
    1.97 +        raise Exception("Alignment from file %s has too few sequences" % input_file)
    1.98      block = Block.from_alignment(alignment)
    1.99 -    remove_monomers_without_structure(block)
   1.100 -
   1.101 -    blocks = block.pair_core_parts(max_delta=args.d, timeout=0)
   1.102 -
   1.103 -    alignment.blocks_to_homology(args.y, blocks)
   1.104 -    if args.o:
   1.105 -        block.to_file(args.o)
   1.106 -    if args.b:
   1.107 -        block.blocks_to_file(args.b, blocks)
   1.108 -    if args.H:
   1.109 -        block.blocks_to_html(args.H, blocks, open(html_template).read())
   1.110 -
   1.111 -class A(object):
   1.112 -    pass
   1.113 +    blocks = block.pair_core_parts(max_delta=max_delta, timeout=0)
   1.114 +    alignment.blocks_to_homology(homology_file, blocks)
   1.115 +    if out_alignment_file:
   1.116 +        block.to_file(out_alignment_file)
   1.117 +    if out_pair_cores_file:
   1.118 +        block.blocks_to_file(out_pair_cores_file, blocks)
   1.119 +    if out_html_file:
   1.120 +        block.blocks_to_html(out_html_file, blocks, open(html_template).read())
   1.121  
   1.122  if __name__ == '__main__':
   1.123 -
   1.124      r = argparse.FileType('r')
   1.125      w = argparse.FileType('w')
   1.126 -
   1.127 -    p = argparse.ArgumentParser(
   1.128 -    description='PairCores',
   1.129 -    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
   1.130 -    )
   1.131 -
   1.132 +    p = argparse.ArgumentParser(description='PairCores')
   1.133      p.add_argument('-v','--version',action='version',version='%(prog)s 2.0')
   1.134 -    p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True)
   1.135 +    p.add_argument('-m',help='Input pdb markup file',metavar='FILE',type=r)
   1.136 +    p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r)
   1.137      p.add_argument('-y',help='Output homology file',metavar='FILE',type=w, required=True)
   1.138 +    p.add_argument('-c',help='Pdb cache directory',metavar='FILE',type=str, default='pdb_cache')
   1.139      p.add_argument('-d',help='Distance spreading',metavar='float',type=float,default=2.0)
   1.140      p.add_argument('-o',help='Output alignment file',metavar='FILE',type=w)
   1.141      p.add_argument('-b',help='Output pair_cores file',metavar='FILE',type=w)
   1.142      p.add_argument('-H',help='Output HTML file',metavar='FILE',type=w)
   1.143 +    args = p.parse_args()
   1.144 +    homology_from_3d(markup_file=args.m, homology_file=args.y, max_delta=args.d,
   1.145 +        alignment_file=args.i, out_alignment_file=args.o, out_pair_cores_file=args.b,
   1.146 +        out_html_file=args.H, pdb_getter=CachedDownloadPdb(cache_dir=args.c))
   1.147  
   1.148 -    args = p.parse_args()
   1.149 -    run(args)
   1.150 -