Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/e1346615e468
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 01:12:52 2012
Кодировка:
allpy: e1346615e468

allpy

changeset 752:e1346615e468

Methods to construct homology (without writing) added
author Andrei <aba@belozersky.msu.ru>
date Mon, 11 Jul 2011 21:55:41 +0400
parents 9032fd191b1c
children e460ca5a071f a46be1f6122f
files allpy/homology.py
diffstat 1 files changed, 108 insertions(+), 36 deletions(-) [+]
line diff
     1.1 --- a/allpy/homology.py	Mon Jul 11 18:27:37 2011 +0400
     1.2 +++ b/allpy/homology.py	Mon Jul 11 21:55:41 2011 +0400
     1.3 @@ -2,42 +2,42 @@
     1.4  from allpy import markups
     1.5  
     1.6  class  MonomerHomology(object):
     1.7 -    """ Essentially, alignment is a set of monomer homology classes
     1.8 -    Object of type MonomerHomology desribes an alignment  in terms of monomer homology 
     1.9 -    Currently, identifies of monomers and monomer homology classes are used 
    1.10 -
    1.11 +    """ Essentially, alignment is a set of monomer homology classes.
    1.12 +    Object of type MonomerHomology desribes an alignment  in terms of monomer homology. 
    1.13      Each monomer of any sequence from an alignment must be contained in a homology class
    1.14  
    1.15 -    Homology of nonequal monomers of the same sequence is allowed (formally), 
    1.16 +    Homology of nonequal monomers of the same sequence is allowed (formally) in attributes, 
    1.17          but there is no implemented methods to deal with 
    1.18  
    1.19      DATA:
    1.20          monomer_ids = {monomer_id:class_id}
    1.21 -        classes     = {class_id:[monomer_id1, monomer_id2]}
    1.22 -            monomer_id = (sequence_name, monomer_number) 
    1.23 +            monomer_id = (sequence_name, monomer_number)
    1.24 +                monomer_number is a number of the monomer in the sequence
    1.25              class_id = string 
    1.26 -        columns = {class_id:column_number} optional
    1.27 -
    1.28 +        classes     = {class_id:[monomer_id1, monomer_id2,...], ...}
    1.29 +        columns = {class_id:column_number}
    1.30          blocks_data = [block_data, ...]
    1.31          block_data = (sequence_names,class_ids)
    1.32              sequence_names = set( [sequence_1_name,...] )
    1.33              class_ids = = set( [class_1_id, ...] )
    1.34  
    1.35 -     BLOCK is assigned to an alignment (currently) 
    1.36 -        Here block is not a block of an alignment, but a set of data to create such block
    1.37 -        On the other hand, block consists of class_ids (not columns) to have possibility to get monomer_id
    1.38 -        Thus, block reaarengement must be accomplished with classes reassignments!
    1.39 -        Not very good, but currently is made as above described 
    1.40 +     block_data contain identifiers of objects only, alignment object is not needed.
    1.41 +     block_data may be converted to bloks of an alignment by appropriate method
    1.42  
    1.43      METHODS:
    1.44 -       + .read(file_name) 
    1.45 -       + .write_monomer(file,monomer_id,monomer_homology_class_id)
    1.46 -       + .write_class(file,list_of_monomer_ids,monomer_homology_class_id)
    1.47 -       + .write_block(file, block)
    1.48 +       + .read(file_name, columns = True)
    1.49 +               Reads file of monomer homology format into MonomerHomology object.
    1.50 +       + .write_monomer(file,monomer_id,monomer_homology_class_id, column_number = column_number)   STATIC
    1.51 +       + .write_class(file,list_of_monomer_ids,monomer_homology_class_id,column_number = column_number)  STATIC
    1.52 +       + .write_block(file, alignment, block)
    1.53 +       + .add_monomer(monomer_id,class_id,column_number)
    1.54 +       + .add_class(monomer_ids,class_id, column_number)
    1.55 +       + .add_block(alignment, block)
    1.56         + .compare_with(class)
    1.57 +           + .two_vs_one(classes_one, classes_two)  STATIC
    1.58         + .highest_blocks()
    1.59         + .alignment_blocks(alignment)
    1.60 -         .case_homology
    1.61 +       + .case_homology(in_file_name,out_file_name, case)  STATIC
    1.62      """
    1.63      def __init__(self, next_class_id = 1):
    1.64           self.classes = {}
    1.65 @@ -46,6 +46,7 @@
    1.66           self.blocks_data = []
    1.67           self.next_class_id = next_class_id
    1.68  
    1.69 +################################################################
    1.70  
    1.71      def read(self, file_name, columns = False):
    1.72          """
    1.73 @@ -106,6 +107,8 @@
    1.74          f.close()
    1.75          return monomers_count 
    1.76  
    1.77 +################################################################
    1.78 +
    1.79      @staticmethod
    1.80      def  two_vs_one(classes_one, classes_two):
    1.81          """ Computes weighted average number of mistakes of classes_two with respect to classes_one 
    1.82 @@ -170,8 +173,44 @@
    1.83  
    1.84          return (mistakes_two_vs_one, mistakes_one_vs_two)
    1.85  
    1.86 +################################################################
    1.87 +
    1.88      @staticmethod
    1.89 -    def write_monomer(file,monomer_id,class_id, column_number = False):
    1.90 +    def write_class(file,monomer_ids,class_id, column_number = False):
    1.91 +        """ Writes list of monomer_ids forming one homology_class
    1.92 +        """
    1.93 +        for monomer_id in monomer_ids:
    1.94 +            MonomerHomology.write_monomer(file,monomer_id,class_id, column_number = column_number)
    1.95 +
    1.96 +
    1.97 +    def add_class(self,monomer_ids,class_id, column_number):
    1.98 +        """ Adds list of monomer_ids forming one homology_class
    1.99 +        """
   1.100 +        for monomer_id in monomer_ids:
   1.101 +            self.add_monomer(monomer_id,class_id, column_number)
   1.102 +
   1.103 +
   1.104 +
   1.105 +
   1.106 +#################################################################      
   1.107 +
   1.108 +    def add_monomer(self,monomer_id,class_id,column_number):
   1.109 +        """ Add one monomer to Homology 
   1.110 +        """
   1.111 +        if len(monomer_id) != 2:
   1.112 +            raise Exception("wrong parameters given for Monomer_homology.write_monomer: len(monomer_id) is not 2!") 
   1.113 +            exit()
   1.114 +        self.monomer_ids[monomer_id] = class_id
   1.115 +
   1.116 +        if class_id in self.classes:
   1.117 +             self.classes[class_id].append(monomer_id) 
   1.118 +        else:
   1.119 +             self.classes[class_id] = [monomer_id]
   1.120 +
   1.121 +        self.columns[class_id] = column_number
   1.122 +
   1.123 +    @staticmethod
   1.124 +    def write_monomer(file, monomer_id, class_id, column_number = False):
   1.125          """ Write a line "class_id sequence_id  monome number \n"  into file
   1.126          """
   1.127          if len(monomer_id) != 2:
   1.128 @@ -186,17 +225,41 @@
   1.129          except:
   1.130              raise Exception("Failed to write monomer into file!") 
   1.131              exit()
   1.132 -        return None
   1.133  
   1.134 -    @staticmethod
   1.135 -    def write_class(file,monomer_ids,class_id, column_number = False):
   1.136 -        """ Writes list of monomer_ids forming one homology_class
   1.137 +
   1.138 +#################################################################      
   1.139 +
   1.140 +    def add_block(self,alignment, block):
   1.141 +        """ Add classes of plus-block to Homology
   1.142          """
   1.143 -        for monomer_id in monomer_ids:
   1.144 -            MonomerHomology.write_monomer(file,monomer_id,class_id, column_number = column_number)
   1.145 -     
   1.146 -    #######################3#
   1.147 -    def write_block(self,file,block,markedup = False):    
   1.148 +        for column in block.columns:
   1.149 +            if len(column) == 0:
   1.150 +                continue
   1.151 +
   1.152 +            try:
   1.153 +                column_number = alignment.markups['number'][column]
   1.154 +            except:
   1.155 +                raise Exception("Alignment column has no number! Alignment must be marked up by numbers before .add_block")   
   1.156 +                exit()
   1.157 +
   1.158 +            for sequence in block.sequences:
   1.159 +                if sequence in column:
   1.160 +
   1.161 +                    try: 
   1.162 +                        monomer_id = (sequence.name, column[sequence].number) 
   1.163 +                    except:
   1.164 +                        raise Exception("Monomer has no number! Sequences must be marked up by numbers before .writ_block") 
   1.165 +                        exit()
   1.166 +
   1.167 +                    self.add_monomer(monomer_id,self.next_class_id, column_number)
   1.168 +
   1.169 +            self.next_class_id +=1
   1.170 +
   1.171 +
   1.172 +         
   1.173 +
   1.174 +
   1.175 +    def write_block(self,file,alignment, block, markedup = True):    
   1.176          """ Writes each block column into as one homology class
   1.177          WARRNINGS: 
   1.178              (1) method works in object of class MonomerHomology
   1.179 @@ -212,7 +275,7 @@
   1.180                  continue
   1.181  
   1.182              if markedup:
   1.183 -                column_number = block.markups['number'][column]
   1.184 +                column_number = alignment.markups['number'][column]
   1.185              else:
   1.186                  column_number = False
   1.187  
   1.188 @@ -229,8 +292,8 @@
   1.189  
   1.190              self.next_class_id +=1
   1.191  
   1.192 - 
   1.193 -    #######################3#
   1.194 +################################################################# 
   1.195 +
   1.196      def _sequences_of_class(self,monomer_ids):
   1.197          """  RETURN set of sequences from a list of monomer_ids
   1.198          """
   1.199 @@ -297,6 +360,8 @@
   1.200  
   1.201          return self.blocks_data 
   1.202  
   1.203 +################################################################
   1.204 +
   1.205      def alignment_blocks(self, alignment):
   1.206          """ From homology.blocks creates a list of alignment blocks (accessible for visualization) 
   1.207  
   1.208 @@ -346,24 +411,27 @@
   1.209  
   1.210              block = protein.Block.from_alignment(alignment, sequences = block_sequences, columns = block_columns)
   1.211              blocks.append(block)
   1.212 -
   1.213          return blocks
   1.214  
   1.215 +#################################################################
   1.216      @staticmethod
   1.217      def case_homology(in_file_name,out_file_name, case):
   1.218 -        """ Makes homology file from  case sensitive input alignment
   1.219 +        """ Makes homology file from input alignment
   1.220 +        case = True => Upper letters in a column are in one class, each lower letter in separate class
   1.221 +        case = False => all letter in a column are in one class
   1.222 +
   1.223          RETURN number of classes
   1.224          """
   1.225          try: 
   1.226              f = open(in_file_name) 
   1.227          except:
   1.228 -            raise Exception("Failed to open file %s" % fasta_file_name)
   1.229 +            raise Exception("Failed to open file %s" % in_file_name)
   1.230              exit()
   1.231          try:
   1.232              alignment = protein.Alignment().append_file(f) 
   1.233              f.close()
   1.234          except:
   1.235 -            raise Exception("Failed to cteate alignment from file %s!" % fasta_file_name)
   1.236 +            raise Exception("Failed to create alignment from file %s!" % in_file_name)
   1.237              exit()
   1.238          try:  
   1.239              g = open(out_file_name, 'w')
   1.240 @@ -406,4 +474,8 @@
   1.241          g.close()    
   1.242          return next_class_id - 1
   1.243  
   1.244 +################################################################
   1.245  
   1.246 +
   1.247 +
   1.248 +