allpy
changeset 1050:00d6145cba0f 1.4.2
Changes in blocks_finder transferred from default to 1.4.2.
author | Boris Burkov <BurkovBA@gmail.com> |
---|---|
date | Mon, 26 Mar 2012 22:08:08 +0400 |
parents | 73579ca7c1b5 |
children | 027f0f2f5417 |
files | sequence_based_blocks_search/blocks_finder.py |
diffstat | 1 files changed, 84 insertions(+), 13 deletions(-) [+] |
line diff
1.1 --- a/sequence_based_blocks_search/blocks_finder.py Fri Mar 23 01:34:48 2012 +0400 1.2 +++ b/sequence_based_blocks_search/blocks_finder.py Mon Mar 26 22:08:08 2012 +0400 1.3 @@ -4,10 +4,47 @@ 1.4 sys.path.append("../") 1.5 from allpy import protein 1.6 from allpy.homology import * 1.7 +from allpy import data 1.8 +from allpy import base 1.9 import copy 1.10 import math 1.11 from functional_groups import functional_groups 1.12 from allpy.util import * 1.13 +from optparse import OptionParser 1.14 + 1.15 + 1.16 +class SbbsProteinMonomer(protein.Monomer): 1.17 + def __eq__(self, other): 1.18 + return self is other 1.19 + 1.20 + def __ne__(self, other): 1.21 + return not self is other 1.22 + 1.23 +SbbsProteinMonomer._initialize(data.codes.protein) 1.24 + 1.25 +class SbbsProteinAlignment(protein.Alignment): 1.26 + def rows_as_strings(self, gaps='-'): 1.27 + rows = [] 1.28 + for sequence in self.sequences: 1.29 + string = "" 1.30 + for column in self.columns: 1.31 + if sequence in column: 1.32 + if hasattr(column[sequence], "input_code1"): 1.33 + string += column[sequence].input_code1 1.34 + else: 1.35 + string += column[sequence].code1 1.36 + else: 1.37 + string += gap 1.38 + string = util.UserString(string) 1.39 + string.sequence = sequence 1.40 + rows.append(string) 1.41 + return rows 1.42 + 1.43 +class SbbsProteinColumn(base.Column): 1.44 + def __eq__(self, other): 1.45 + return self is other 1.46 + 1.47 +vars(base)['Column'] = SbbsProteinColumn 1.48 1.49 class Preblock(object): 1.50 """Attributes: 1.51 @@ -51,7 +88,10 @@ 1.52 1.53 1.54 def main(file): 1.55 - alignment = protein.Alignment().append_file(file) 1.56 + try: 1.57 + alignment = SbbsProteinAlignment().append_file(file) 1.58 + except Exception as E: 1.59 + raise Exception("Failed to read alignment\nCheck alignment format") 1.60 groups_content = find_groups_content(alignment, functional_groups) 1.61 find_links(groups_content, alignment) 1.62 blocks = mark_blocks(alignment) 1.63 @@ -88,7 +128,7 @@ 1.64 addition_to_candidate_preblocks.append(Preblock(index_of_column, index_of_column, copy.copy(groups_content[column][group_name]), functional_groups[group_name]["70%"], functional_groups[group_name]["100%"], 0.0, alignment)) 1.65 except KeyError as ke: 1.66 print "Found non-canonical aminoacid: %s"%(ke) 1.67 - #create new preblocks as combinations of the old ones 1.68 + #create new preblocks as combinations of the old ones 1.69 try: 1.70 for preblock in candidate_preblocks: 1.71 preblock_extensions = [] 1.72 @@ -117,7 +157,7 @@ 1.73 overlapping_preblocks=[] 1.74 for preblock in candidate_preblocks: 1.75 if preblock.alignment.columns[preblock.last_column_index] is column and preblock.calculate_score() > calculate_threshold(len(preblock.sequences), len(alignment.sequences), functional_groups): 1.76 - create_links(preblock, alignment) #links are not cliques, just connected components 1.77 + create_links(preblock, alignment) #links are not cliques, just connected components 1.78 overlapping_preblocks+=accept_and_return_overlapping_preblocks_before(preblock, alignment, candidate_preblocks) 1.79 if preblock not in overlapping_preblocks: new_candidate_preblocks.append(preblock) 1.80 candidate_preblocks = new_candidate_preblocks 1.81 @@ -218,7 +258,7 @@ 1.82 inconserved positions, create the transition matrix, calculate 1.83 its eigenvectors and eigenvalues, round the weights, find coefficient 1.84 of [1,0,0,...] decomposition in the basis of eigenvectors. 1.85 - 1.86 + 1.87 1.88 """ 1.89 1.90 @@ -244,13 +284,13 @@ 1.91 # column[iterated_sequence].links=[sequence] 1.92 # else: 1.93 # if sequence not in column[iterated_sequence].links: column[iterated_sequence].links.append(sequence) 1.94 - 1.95 + 1.96 if "links" not in column[any_sequence].__dict__: 1.97 column[any_sequence].links=[] 1.98 remaining_sequences = copy.copy(preblock.sequences) 1.99 remaining_sequences.remove(any_sequence) 1.100 for sequence in preblock.sequences:#links are bidirectional 1.101 - if sequence not in column[any_sequence].links: 1.102 + if sequence not in column[any_sequence].links: 1.103 column[any_sequence].links.append(sequence) 1.104 if "links" not in column[sequence].__dict__: 1.105 column[sequence].links=[any_sequence] 1.106 @@ -304,8 +344,8 @@ 1.107 #print index_of_column, [gblock.columns for gblock in blocks_in_this_column] 1.108 if alignment.columns.index(column) == len(alignment.columns)-1: output+=blocks_in_this_column 1.109 return output 1.110 - 1.111 - 1.112 + 1.113 + 1.114 def find_connected_component(sequences, column): 1.115 """output is a list of sequences""" 1.116 #find one sequence that has blocks 1.117 @@ -326,7 +366,7 @@ 1.118 if another_sequence not in queue and another_sequence not in output and another_sequence in sequences: 1.119 queue.append(another_sequence) 1.120 return output 1.121 - 1.122 + 1.123 1.124 def delete_links(alignment): 1.125 for sequence in alignment.sequences: 1.126 @@ -334,7 +374,7 @@ 1.127 if "links" in monomer.__dict__: 1.128 del monomer.links 1.129 1.130 -#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147] 1.131 +#[gblock for gblock in blocks_in_previous_column for gcolumn in gblock.columns if alignment.columns.index(gcolumn)==147] 1.132 1.133 def create_monomer_homology(alignment): 1.134 """returns MonomerHomology object, given alignment 1.135 @@ -380,8 +420,16 @@ 1.136 sys.exit() 1.137 #creating markups 1.138 for sequence in alignment.sequences: 1.139 - sequence.add_markup('index') 1.140 - aim = alignment.add_markup('index') 1.141 + if 'markups' in sequence.__dict__: 1.142 + if 'index' in sequence.markups: 1.143 + pass 1.144 + else: 1.145 + sequence.add_markup('index') 1.146 + if 'markups' in alignment.__dict__: 1.147 + if 'index' in alignment.markups: 1.148 + aim = alignment.markups['index'] 1.149 + else: 1.150 + aim = alignment.add_markup('index') 1.151 #inferring classes_of_equivalence = homologous monomers = connected_components from links 1.152 class_number = 0 1.153 for column in alignment.columns: 1.154 @@ -408,4 +456,27 @@ 1.155 1.156 1.157 if __name__== '__main__': 1.158 - main(open(sys.argv[1])) 1.159 + usage = "usage: %prog [options] arg" 1.160 + parser = OptionParser(usage) 1.161 + parser.add_option("-c", "--classes", dest="output_classes", help="Name of optional output file with homology classes") 1.162 + (options, args) = parser.parse_args() 1.163 + input_file = open(args[0]) 1.164 + blocks = main(input_file) 1.165 + if blocks == []: 1.166 + sys.exit() 1.167 + aim = blocks[0].alignment.add_markup('index') #create markup 1.168 + file_content="" 1.169 + for index, block in enumerate(blocks): 1.170 + for sequence in block.sequences: 1.171 + file_content+=str(str(args[1]))+"\t"+str(index)+"\t"+str(sequence.name)+"\t" 1.172 + buffer="" 1.173 + for column in block.columns: 1.174 + buffer+=str(aim[column])+" " 1.175 + if (len(buffer)!=0): buffer=buffer[:-1] 1.176 + file_content+=buffer+"\n" 1.177 + 1.178 + output_file = open(args[1], 'w') 1.179 + output_file.write(file_content) 1.180 + 1.181 + if options.output_classes: 1.182 + create_file_with_monomer_homology(blocks[0].alignment, options.output_classes)