allpy
changeset 1045:7477ef0120e1
Automated merge with ssh://kodomo.fbb.msu.ru/allpy
author | Boris Burkov <BurkovBA@gmail.com> |
---|---|
date | Fri, 23 Mar 2012 16:52:31 +0400 |
parents | 811914ba7195 370678689947 |
children | 3cc7ef543da5 |
files | allpy/base.py |
diffstat | 2 files changed, 85 insertions(+), 13 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Fri Mar 23 01:27:40 2012 +0400 1.2 +++ b/allpy/base.py Fri Mar 23 16:52:31 2012 +0400 1.3 @@ -512,6 +512,7 @@ 1.4 """Return hash by identity.""" 1.5 return id(self) 1.6 1.7 + 1.8 class Block(Alignment): 1.9 """Block of alignment. 1.10
2.1 --- a/sequence_based_blocks_search/blocks_finder.py Fri Mar 23 01:27:40 2012 +0400 2.2 +++ b/sequence_based_blocks_search/blocks_finder.py Fri Mar 23 16:52:31 2012 +0400 2.3 @@ -4,10 +4,47 @@ 2.4 sys.path.append("../") 2.5 from allpy import protein 2.6 from allpy.homology import * 2.7 +from allpy import data 2.8 +from allpy import base 2.9 import copy 2.10 import math 2.11 from functional_groups import functional_groups 2.12 from allpy.util import * 2.13 +from optparse import OptionParser 2.14 + 2.15 + 2.16 +class SbbsProteinMonomer(protein.Monomer): 2.17 + def __eq__(self, other): 2.18 + return self is other 2.19 + 2.20 + def __ne__(self, other): 2.21 + return not self is other 2.22 + 2.23 +SbbsProteinMonomer._initialize(data.codes.protein) 2.24 + 2.25 +class SbbsProteinAlignment(protein.Alignment): 2.26 + def rows_as_strings(self, gaps='-'): 2.27 + rows = [] 2.28 + for sequence in self.sequences: 2.29 + string = "" 2.30 + for column in self.columns: 2.31 + if sequence in column: 2.32 + if hasattr(column[sequence], "input_code1"): 2.33 + string += column[sequence].input_code1 2.34 + else: 2.35 + string += column[sequence].code1 2.36 + else: 2.37 + string += gap 2.38 + string = util.UserString(string) 2.39 + string.sequence = sequence 2.40 + rows.append(string) 2.41 + return rows 2.42 + 2.43 +class SbbsProteinColumn(base.Column): 2.44 + def __eq__(self, other): 2.45 + return self is other 2.46 + 2.47 +vars(base)['Column'] = SbbsProteinColumn 2.48 2.49 class Preblock(object): 2.50 """Attributes: 2.51 @@ -51,7 +88,10 @@ 2.52 2.53 2.54 def main(file): 2.55 - alignment = protein.Alignment().append_file(file) 2.56 + try: 2.57 + alignment = SbbsProteinAlignment().append_file(file) 2.58 + except Exception as E: 2.59 + raise Exception("Failed to read alignment\nCheck alignment format") 2.60 groups_content = find_groups_content(alignment, functional_groups) 2.61 find_links(groups_content, alignment) 2.62 blocks = mark_blocks(alignment) 2.63 @@ -88,7 +128,7 @@ 2.64 addition_to_candidate_preblocks.append(Preblock(index_of_column, index_of_column, copy.copy(groups_content[column][group_name]), functional_groups[group_name]["70%"], functional_groups[group_name]["100%"], 0.0, alignment)) 2.65 except KeyError as ke: 2.66 print "Found non-canonical aminoacid: %s"%(ke) 2.67 - #create new preblocks as combinations of the old ones 2.68 + #create new preblocks as combinations of the old ones 2.69 try: 2.70 for preblock in candidate_preblocks: 2.71 preblock_extensions = [] 2.72 @@ -117,7 +157,7 @@ 2.73 overlapping_preblocks=[] 2.74 for preblock in candidate_preblocks: 2.75 if preblock.alignment.columns[preblock.last_column_index] is column and preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(alignment.sequences), functional_groups): 2.76 - create_links(preblock, alignment) #links are not cliques, just connected components 2.77 + create_links(preblock, alignment) #links are not cliques, just connected components 2.78 overlapping_preblocks+=accept_and_return_overlapping_preblocks_before(preblock, alignment, candidate_preblocks) 2.79 if preblock not in overlapping_preblocks: new_candidate_preblocks.append(preblock) 2.80 candidate_preblocks = new_candidate_preblocks 2.81 @@ -218,7 +258,7 @@ 2.82 inconserved positions, create the transition matrix, calculate 2.83 its eigenvectors and eigenvalues, round the weights, find coefficient 2.84 of [1,0,0,...] decomposition in the basis of eigenvectors. 2.85 - 2.86 + 2.87 2.88 """ 2.89 2.90 @@ -244,13 +284,13 @@ 2.91 # column[iterated_sequence].links=[sequence] 2.92 # else: 2.93 # if sequence not in column[iterated_sequence].links: column[iterated_sequence].links.append(sequence) 2.94 - 2.95 + 2.96 if "links" not in column[any_sequence].__dict__: 2.97 column[any_sequence].links=[] 2.98 remaining_sequences = copy.copy(preblock.sequences) 2.99 remaining_sequences.remove(any_sequence) 2.100 for sequence in preblock.sequences:#links are bidirectional 2.101 - if sequence not in column[any_sequence].links: 2.102 + if sequence not in column[any_sequence].links: 2.103 column[any_sequence].links.append(sequence) 2.104 if "links" not in column[sequence].__dict__: 2.105 column[sequence].links=[any_sequence] 2.106 @@ -304,8 +344,8 @@ 2.107 #print index_of_column, [gblock.columns for gblock in blocks_in_this_column] 2.108 if alignment.columns.index(column) == len(alignment.columns)-1: output+=blocks_in_this_column 2.109 return output 2.110 - 2.111 - 2.112 + 2.113 + 2.114 def find_connected_component(sequences, column): 2.115 """output is a list of sequences""" 2.116 #find one sequence that has blocks 2.117 @@ -326,7 +366,7 @@ 2.118 if another_sequence not in queue and another_sequence not in output and another_sequence in sequences: 2.119 queue.append(another_sequence) 2.120 return output 2.121 - 2.122 + 2.123 2.124 def delete_links(alignment): 2.125 for sequence in alignment.sequences: 2.126 @@ -334,7 +374,7 @@ 2.127 if "links" in monomer.__dict__: 2.128 del monomer.links 2.129 2.130 -#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147] 2.131 +#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147] 2.132 2.133 def create_monomer_homology(alignment): 2.134 """returns MonomerHomology object, given alignment 2.135 @@ -380,8 +420,16 @@ 2.136 sys.exit() 2.137 #creating markups 2.138 for sequence in alignment.sequences: 2.139 - sequence.add_markup('index') 2.140 - aim = alignment.add_markup('index') 2.141 + if 'markups' in sequence.__dict__: 2.142 + if 'index' in sequence.markups: 2.143 + pass 2.144 + else: 2.145 + sequence.add_markup('index') 2.146 + if 'markups' in alignment.__dict__: 2.147 + if 'index' in alignment.markups: 2.148 + aim = alignment.markups['index'] 2.149 + else: 2.150 + aim = alignment.add_markup('index') 2.151 #inferring classes_of_equivalence = homologous monomers = connected_components from links 2.152 class_number = 0 2.153 for column in alignment.columns: 2.154 @@ -408,4 +456,27 @@ 2.155 2.156 2.157 if __name__== '__main__': 2.158 - main(open(sys.argv[1])) 2.159 + usage = "usage: %prog [options] arg" 2.160 + parser = OptionParser(usage) 2.161 + parser.add_option("-c", "--classes", dest="output_classes", help="Name of optional output file with homology classes") 2.162 + (options, args) = parser.parse_args() 2.163 + input_file = open(args[0]) 2.164 + blocks = main(input_file) 2.165 + if blocks == []: 2.166 + sys.exit() 2.167 + aim = blocks[0].alignment.add_markup('index') #create markup 2.168 + file_content="" 2.169 + for index, block in enumerate(blocks): 2.170 + for sequence in block.sequences: 2.171 + file_content+=str(str(args[1]))+"\t"+str(index)+"\t"+str(sequence.name)+"\t" 2.172 + buffer="" 2.173 + for column in block.columns: 2.174 + buffer+=str(aim[column])+" " 2.175 + if (len(buffer)!=0): buffer=buffer[:-1] 2.176 + file_content+=buffer+"\n" 2.177 + 2.178 + output_file = open(args[1], 'w') 2.179 + output_file.write(file_content) 2.180 + 2.181 + if options.output_classes: 2.182 + create_file_with_monomer_homology(blocks[0].alignment, options.output_classes)