allpy

changeset 1045:7477ef0120e1
Automated merge with ssh://kodomo.fbb.msu.ru/allpy
author: Boris Burkov <BurkovBA@gmail.com>
date: Fri, 23 Mar 2012 16:52:31 +0400
parents: 811914ba7195 370678689947
children: 3cc7ef543da5
files: allpy/base.py
diffstat: 2 files changed, 85 insertions(+), 13 deletions(-) [+]
[-]

allpy/base.py 1

sequence_based_blocks_search/blocks_finder.py 97 allpy/base.py 1 sequence_based_blocks_search/blocks_finder.py 97
allpy/base.py 1
sequence_based_blocks_search/blocks_finder.py 97
     1.1 --- a/allpy/base.py	Fri Mar 23 01:27:40 2012 +0400
     1.2 +++ b/allpy/base.py	Fri Mar 23 16:52:31 2012 +0400
     1.3 @@ -512,6 +512,7 @@
     1.4          """Return hash by identity."""
     1.5          return id(self)
     1.6  
     1.7 +
     1.8  class Block(Alignment):
     1.9      """Block of alignment.
    1.10  

     2.1 --- a/sequence_based_blocks_search/blocks_finder.py	Fri Mar 23 01:27:40 2012 +0400
     2.2 +++ b/sequence_based_blocks_search/blocks_finder.py	Fri Mar 23 16:52:31 2012 +0400
     2.3 @@ -4,10 +4,47 @@
     2.4  sys.path.append("../")
     2.5  from allpy import protein
     2.6  from allpy.homology import *
     2.7 +from allpy import data
     2.8 +from allpy import base
     2.9  import copy
    2.10  import math
    2.11  from functional_groups import functional_groups
    2.12  from allpy.util import *
    2.13 +from optparse import OptionParser
    2.14 +
    2.15 +
    2.16 +class SbbsProteinMonomer(protein.Monomer):
    2.17 +    def __eq__(self, other):
    2.18 +        return self is other
    2.19 +
    2.20 +    def __ne__(self, other):
    2.21 +        return not self is other
    2.22 +
    2.23 +SbbsProteinMonomer._initialize(data.codes.protein)
    2.24 +
    2.25 +class SbbsProteinAlignment(protein.Alignment):
    2.26 +    def rows_as_strings(self, gaps='-'):
    2.27 +        rows = []
    2.28 +        for sequence in self.sequences:
    2.29 +            string = ""
    2.30 +            for column in self.columns:
    2.31 +                if sequence in column:
    2.32 +                    if hasattr(column[sequence], "input_code1"):
    2.33 +                        string += column[sequence].input_code1
    2.34 +                    else:
    2.35 +                        string += column[sequence].code1
    2.36 +                else:
    2.37 +                    string += gap
    2.38 +            string = util.UserString(string)
    2.39 +            string.sequence = sequence
    2.40 +            rows.append(string)
    2.41 +        return rows
    2.42 +
    2.43 +class SbbsProteinColumn(base.Column):
    2.44 +    def __eq__(self, other):
    2.45 +        return self is other
    2.46 +
    2.47 +vars(base)['Column'] = SbbsProteinColumn
    2.48  
    2.49  class Preblock(object):
    2.50      """Attributes:
    2.51 @@ -51,7 +88,10 @@
    2.52  
    2.53  
    2.54  def main(file):
    2.55 -    alignment = protein.Alignment().append_file(file)
    2.56 +    try:
    2.57 +        alignment = SbbsProteinAlignment().append_file(file)
    2.58 +    except Exception as E:
    2.59 +        raise Exception("Failed to read alignment\nCheck alignment format")
    2.60      groups_content = find_groups_content(alignment, functional_groups)
    2.61      find_links(groups_content, alignment)
    2.62      blocks = mark_blocks(alignment)
    2.63 @@ -88,7 +128,7 @@
    2.64                      addition_to_candidate_preblocks.append(Preblock(index_of_column, index_of_column, copy.copy(groups_content[column][group_name]), functional_groups[group_name]["70%"], functional_groups[group_name]["100%"], 0.0, alignment))
    2.65          except KeyError as ke:
    2.66              print "Found non-canonical aminoacid: %s"%(ke)
    2.67 -        #create new preblocks as combinations of the old ones 
    2.68 +        #create new preblocks as combinations of the old ones
    2.69          try:
    2.70              for preblock in candidate_preblocks:
    2.71                  preblock_extensions = []
    2.72 @@ -117,7 +157,7 @@
    2.73          overlapping_preblocks=[]
    2.74          for preblock in candidate_preblocks:
    2.75              if preblock.alignment.columns[preblock.last_column_index] is column and preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(alignment.sequences), functional_groups):
    2.76 -                create_links(preblock, alignment) #links are not cliques, just connected components 
    2.77 +                create_links(preblock, alignment) #links are not cliques, just connected components
    2.78                  overlapping_preblocks+=accept_and_return_overlapping_preblocks_before(preblock, alignment, candidate_preblocks)
    2.79              if preblock not in overlapping_preblocks: new_candidate_preblocks.append(preblock)
    2.80          candidate_preblocks = new_candidate_preblocks
    2.81 @@ -218,7 +258,7 @@
    2.82      inconserved positions, create the transition matrix, calculate
    2.83      its eigenvectors and eigenvalues, round the weights, find coefficient
    2.84      of [1,0,0,...] decomposition in the basis of eigenvectors.
    2.85 -    
    2.86 +
    2.87  
    2.88      """
    2.89  
    2.90 @@ -244,13 +284,13 @@
    2.91  #                    column[iterated_sequence].links=[sequence]
    2.92  #                else:
    2.93  #                    if sequence not in column[iterated_sequence].links: column[iterated_sequence].links.append(sequence)
    2.94 -       
    2.95 +
    2.96          if "links" not in column[any_sequence].__dict__:
    2.97              column[any_sequence].links=[]
    2.98          remaining_sequences = copy.copy(preblock.sequences)
    2.99          remaining_sequences.remove(any_sequence)
   2.100          for sequence in preblock.sequences:#links are bidirectional
   2.101 -            if sequence not in column[any_sequence].links: 
   2.102 +            if sequence not in column[any_sequence].links:
   2.103                  column[any_sequence].links.append(sequence)
   2.104                  if "links" not in column[sequence].__dict__:
   2.105                      column[sequence].links=[any_sequence]
   2.106 @@ -304,8 +344,8 @@
   2.107          #print index_of_column, [gblock.columns for gblock in blocks_in_this_column]
   2.108          if alignment.columns.index(column) == len(alignment.columns)-1: output+=blocks_in_this_column
   2.109      return output
   2.110 -               
   2.111 -                
   2.112 +
   2.113 +
   2.114  def find_connected_component(sequences, column):
   2.115      """output is a list of sequences"""
   2.116      #find one sequence that has blocks
   2.117 @@ -326,7 +366,7 @@
   2.118                  if another_sequence not in queue and another_sequence not in output and another_sequence in sequences:
   2.119                      queue.append(another_sequence)
   2.120      return output
   2.121 -                
   2.122 +
   2.123  
   2.124  def delete_links(alignment):
   2.125      for sequence in alignment.sequences:
   2.126 @@ -334,7 +374,7 @@
   2.127              if "links" in monomer.__dict__:
   2.128                  del monomer.links
   2.129  
   2.130 -#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147]   
   2.131 +#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147]
   2.132  
   2.133  def create_monomer_homology(alignment):
   2.134      """returns MonomerHomology object, given alignment
   2.135 @@ -380,8 +420,16 @@
   2.136          sys.exit()
   2.137      #creating markups
   2.138      for sequence in alignment.sequences:
   2.139 -        sequence.add_markup('index')
   2.140 -    aim = alignment.add_markup('index')
   2.141 +        if 'markups' in sequence.__dict__:
   2.142 +            if 'index' in sequence.markups:
   2.143 +                pass
   2.144 +            else:
   2.145 +                sequence.add_markup('index')
   2.146 +    if 'markups' in alignment.__dict__:
   2.147 +        if 'index' in alignment.markups:
   2.148 +            aim = alignment.markups['index']
   2.149 +        else:
   2.150 +            aim = alignment.add_markup('index')
   2.151      #inferring classes_of_equivalence = homologous monomers = connected_components from links
   2.152      class_number = 0
   2.153      for column in alignment.columns:
   2.154 @@ -408,4 +456,27 @@
   2.155  
   2.156  
   2.157  if __name__== '__main__':
   2.158 -    main(open(sys.argv[1]))
   2.159 +    usage = "usage: %prog [options] arg"
   2.160 +    parser = OptionParser(usage)
   2.161 +    parser.add_option("-c", "--classes", dest="output_classes", help="Name of optional output file with homology classes")
   2.162 +    (options, args) = parser.parse_args()
   2.163 +    input_file = open(args[0])
   2.164 +    blocks = main(input_file)
   2.165 +    if blocks == []:
   2.166 +        sys.exit()
   2.167 +    aim = blocks[0].alignment.add_markup('index') #create markup
   2.168 +    file_content=""
   2.169 +    for index, block in enumerate(blocks):
   2.170 +        for sequence in block.sequences:
   2.171 +            file_content+=str(str(args[1]))+"\t"+str(index)+"\t"+str(sequence.name)+"\t"
   2.172 +            buffer=""
   2.173 +            for column in block.columns:
   2.174 +                buffer+=str(aim[column])+" "
   2.175 +            if (len(buffer)!=0): buffer=buffer[:-1]
   2.176 +            file_content+=buffer+"\n"
   2.177 +
   2.178 +    output_file = open(args[1], 'w')
   2.179 +    output_file.write(file_content)
   2.180 +
   2.181 +    if options.output_classes:
   2.182 +        create_file_with_monomer_homology(blocks[0].alignment, options.output_classes)