allpy
changeset 752:e1346615e468
Methods to construct homology (without writing) added
author | Andrei <aba@belozersky.msu.ru> |
---|---|
date | Mon, 11 Jul 2011 21:55:41 +0400 |
parents | 9032fd191b1c |
children | e460ca5a071f a46be1f6122f |
files | allpy/homology.py |
diffstat | 1 files changed, 108 insertions(+), 36 deletions(-) [+] |
line diff
1.1 --- a/allpy/homology.py Mon Jul 11 18:27:37 2011 +0400 1.2 +++ b/allpy/homology.py Mon Jul 11 21:55:41 2011 +0400 1.3 @@ -2,42 +2,42 @@ 1.4 from allpy import markups 1.5 1.6 class MonomerHomology(object): 1.7 - """ Essentially, alignment is a set of monomer homology classes 1.8 - Object of type MonomerHomology desribes an alignment in terms of monomer homology 1.9 - Currently, identifies of monomers and monomer homology classes are used 1.10 - 1.11 + """ Essentially, alignment is a set of monomer homology classes. 1.12 + Object of type MonomerHomology desribes an alignment in terms of monomer homology. 1.13 Each monomer of any sequence from an alignment must be contained in a homology class 1.14 1.15 - Homology of nonequal monomers of the same sequence is allowed (formally), 1.16 + Homology of nonequal monomers of the same sequence is allowed (formally) in attributes, 1.17 but there is no implemented methods to deal with 1.18 1.19 DATA: 1.20 monomer_ids = {monomer_id:class_id} 1.21 - classes = {class_id:[monomer_id1, monomer_id2]} 1.22 - monomer_id = (sequence_name, monomer_number) 1.23 + monomer_id = (sequence_name, monomer_number) 1.24 + monomer_number is a number of the monomer in the sequence 1.25 class_id = string 1.26 - columns = {class_id:column_number} optional 1.27 - 1.28 + classes = {class_id:[monomer_id1, monomer_id2,...], ...} 1.29 + columns = {class_id:column_number} 1.30 blocks_data = [block_data, ...] 1.31 block_data = (sequence_names,class_ids) 1.32 sequence_names = set( [sequence_1_name,...] ) 1.33 class_ids = = set( [class_1_id, ...] ) 1.34 1.35 - BLOCK is assigned to an alignment (currently) 1.36 - Here block is not a block of an alignment, but a set of data to create such block 1.37 - On the other hand, block consists of class_ids (not columns) to have possibility to get monomer_id 1.38 - Thus, block reaarengement must be accomplished with classes reassignments! 1.39 - Not very good, but currently is made as above described 1.40 + block_data contain identifiers of objects only, alignment object is not needed. 1.41 + block_data may be converted to bloks of an alignment by appropriate method 1.42 1.43 METHODS: 1.44 - + .read(file_name) 1.45 - + .write_monomer(file,monomer_id,monomer_homology_class_id) 1.46 - + .write_class(file,list_of_monomer_ids,monomer_homology_class_id) 1.47 - + .write_block(file, block) 1.48 + + .read(file_name, columns = True) 1.49 + Reads file of monomer homology format into MonomerHomology object. 1.50 + + .write_monomer(file,monomer_id,monomer_homology_class_id, column_number = column_number) STATIC 1.51 + + .write_class(file,list_of_monomer_ids,monomer_homology_class_id,column_number = column_number) STATIC 1.52 + + .write_block(file, alignment, block) 1.53 + + .add_monomer(monomer_id,class_id,column_number) 1.54 + + .add_class(monomer_ids,class_id, column_number) 1.55 + + .add_block(alignment, block) 1.56 + .compare_with(class) 1.57 + + .two_vs_one(classes_one, classes_two) STATIC 1.58 + .highest_blocks() 1.59 + .alignment_blocks(alignment) 1.60 - .case_homology 1.61 + + .case_homology(in_file_name,out_file_name, case) STATIC 1.62 """ 1.63 def __init__(self, next_class_id = 1): 1.64 self.classes = {} 1.65 @@ -46,6 +46,7 @@ 1.66 self.blocks_data = [] 1.67 self.next_class_id = next_class_id 1.68 1.69 +################################################################ 1.70 1.71 def read(self, file_name, columns = False): 1.72 """ 1.73 @@ -106,6 +107,8 @@ 1.74 f.close() 1.75 return monomers_count 1.76 1.77 +################################################################ 1.78 + 1.79 @staticmethod 1.80 def two_vs_one(classes_one, classes_two): 1.81 """ Computes weighted average number of mistakes of classes_two with respect to classes_one 1.82 @@ -170,8 +173,44 @@ 1.83 1.84 return (mistakes_two_vs_one, mistakes_one_vs_two) 1.85 1.86 +################################################################ 1.87 + 1.88 @staticmethod 1.89 - def write_monomer(file,monomer_id,class_id, column_number = False): 1.90 + def write_class(file,monomer_ids,class_id, column_number = False): 1.91 + """ Writes list of monomer_ids forming one homology_class 1.92 + """ 1.93 + for monomer_id in monomer_ids: 1.94 + MonomerHomology.write_monomer(file,monomer_id,class_id, column_number = column_number) 1.95 + 1.96 + 1.97 + def add_class(self,monomer_ids,class_id, column_number): 1.98 + """ Adds list of monomer_ids forming one homology_class 1.99 + """ 1.100 + for monomer_id in monomer_ids: 1.101 + self.add_monomer(monomer_id,class_id, column_number) 1.102 + 1.103 + 1.104 + 1.105 + 1.106 +################################################################# 1.107 + 1.108 + def add_monomer(self,monomer_id,class_id,column_number): 1.109 + """ Add one monomer to Homology 1.110 + """ 1.111 + if len(monomer_id) != 2: 1.112 + raise Exception("wrong parameters given for Monomer_homology.write_monomer: len(monomer_id) is not 2!") 1.113 + exit() 1.114 + self.monomer_ids[monomer_id] = class_id 1.115 + 1.116 + if class_id in self.classes: 1.117 + self.classes[class_id].append(monomer_id) 1.118 + else: 1.119 + self.classes[class_id] = [monomer_id] 1.120 + 1.121 + self.columns[class_id] = column_number 1.122 + 1.123 + @staticmethod 1.124 + def write_monomer(file, monomer_id, class_id, column_number = False): 1.125 """ Write a line "class_id sequence_id monome number \n" into file 1.126 """ 1.127 if len(monomer_id) != 2: 1.128 @@ -186,17 +225,41 @@ 1.129 except: 1.130 raise Exception("Failed to write monomer into file!") 1.131 exit() 1.132 - return None 1.133 1.134 - @staticmethod 1.135 - def write_class(file,monomer_ids,class_id, column_number = False): 1.136 - """ Writes list of monomer_ids forming one homology_class 1.137 + 1.138 +################################################################# 1.139 + 1.140 + def add_block(self,alignment, block): 1.141 + """ Add classes of plus-block to Homology 1.142 """ 1.143 - for monomer_id in monomer_ids: 1.144 - MonomerHomology.write_monomer(file,monomer_id,class_id, column_number = column_number) 1.145 - 1.146 - #######################3# 1.147 - def write_block(self,file,block,markedup = False): 1.148 + for column in block.columns: 1.149 + if len(column) == 0: 1.150 + continue 1.151 + 1.152 + try: 1.153 + column_number = alignment.markups['number'][column] 1.154 + except: 1.155 + raise Exception("Alignment column has no number! Alignment must be marked up by numbers before .add_block") 1.156 + exit() 1.157 + 1.158 + for sequence in block.sequences: 1.159 + if sequence in column: 1.160 + 1.161 + try: 1.162 + monomer_id = (sequence.name, column[sequence].number) 1.163 + except: 1.164 + raise Exception("Monomer has no number! Sequences must be marked up by numbers before .writ_block") 1.165 + exit() 1.166 + 1.167 + self.add_monomer(monomer_id,self.next_class_id, column_number) 1.168 + 1.169 + self.next_class_id +=1 1.170 + 1.171 + 1.172 + 1.173 + 1.174 + 1.175 + def write_block(self,file,alignment, block, markedup = True): 1.176 """ Writes each block column into as one homology class 1.177 WARRNINGS: 1.178 (1) method works in object of class MonomerHomology 1.179 @@ -212,7 +275,7 @@ 1.180 continue 1.181 1.182 if markedup: 1.183 - column_number = block.markups['number'][column] 1.184 + column_number = alignment.markups['number'][column] 1.185 else: 1.186 column_number = False 1.187 1.188 @@ -229,8 +292,8 @@ 1.189 1.190 self.next_class_id +=1 1.191 1.192 - 1.193 - #######################3# 1.194 +################################################################# 1.195 + 1.196 def _sequences_of_class(self,monomer_ids): 1.197 """ RETURN set of sequences from a list of monomer_ids 1.198 """ 1.199 @@ -297,6 +360,8 @@ 1.200 1.201 return self.blocks_data 1.202 1.203 +################################################################ 1.204 + 1.205 def alignment_blocks(self, alignment): 1.206 """ From homology.blocks creates a list of alignment blocks (accessible for visualization) 1.207 1.208 @@ -346,24 +411,27 @@ 1.209 1.210 block = protein.Block.from_alignment(alignment, sequences = block_sequences, columns = block_columns) 1.211 blocks.append(block) 1.212 - 1.213 return blocks 1.214 1.215 +################################################################# 1.216 @staticmethod 1.217 def case_homology(in_file_name,out_file_name, case): 1.218 - """ Makes homology file from case sensitive input alignment 1.219 + """ Makes homology file from input alignment 1.220 + case = True => Upper letters in a column are in one class, each lower letter in separate class 1.221 + case = False => all letter in a column are in one class 1.222 + 1.223 RETURN number of classes 1.224 """ 1.225 try: 1.226 f = open(in_file_name) 1.227 except: 1.228 - raise Exception("Failed to open file %s" % fasta_file_name) 1.229 + raise Exception("Failed to open file %s" % in_file_name) 1.230 exit() 1.231 try: 1.232 alignment = protein.Alignment().append_file(f) 1.233 f.close() 1.234 except: 1.235 - raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name) 1.236 + raise Exception("Failed to create alignment from file %s!" % in_file_name) 1.237 exit() 1.238 try: 1.239 g = open(out_file_name, 'w') 1.240 @@ -406,4 +474,8 @@ 1.241 g.close() 1.242 return next_class_id - 1 1.243 1.244 +################################################################ 1.245 1.246 + 1.247 + 1.248 +