allpy

changeset 4:0905a39738f2
project.py: EXTREMELY ugly method to calculate the identity percentage of alignment columns for colouring has been added. It will be improved later.
author: grishin@gorilla
date: Wed, 09 Jun 2010 14:43:04 +0400
parents: 4afd86909244
children: 76816df24fe2
files: lib/project.py
diffstat: 1 files changed, 40 insertions(+), 1 deletions(-) [+]
[-]

lib/project.py 41 lib/project.py 41
lib/project.py 41
     1.1 --- a/lib/project.py	Wed Jun 09 14:40:42 2010 +0400
     1.2 +++ b/lib/project.py	Wed Jun 09 14:43:04 2010 +0400
     1.3 @@ -9,6 +9,7 @@
     1.4  
     1.5  import sequence
     1.6  import monomer
     1.7 +import allpy_data
     1.8  
     1.9  class Project(object):
    1.10      """
    1.11 @@ -44,6 +45,45 @@
    1.12  
    1.13      def __len__(self):
    1.14          return max([len(line) for line in self.alignment.values()])
    1.15 +       
    1.16 +    def calc_identity(self):
    1.17 +        """ Calculate the identity of alignment positions for colouring.
    1.18 +
    1.19 +        For every (row, column) in alignment the percentage of the exactly
    1.20 +        same residue in the same column in the alignment is calculated.
    1.21 +        The data structure is just like the Project.alignment, but istead of 
    1.22 +        monomers it contains float percentages.
    1.23 +        """
    1.24 +        # Oh, God, that's awful! Absolutely not understandable.
    1.25 +        # First, calculate percentages of amino acids in every column
    1.26 +        contribution = 1.0 / len(self.sequences)
    1.27 +        all_columns = []
    1.28 +        for position in range(len(self)):
    1.29 +            column_percentage = {}
    1.30 +            for seq in self.alignment:
    1.31 +                if self.alignment[seq][position] is not None:
    1.32 +                    aa = self.alignment[seq][position].code
    1.33 +                else:
    1.34 +                    aa = None
    1.35 +                if aa in allpy_data.amino_acids:
    1.36 +                    if aa in column_percentage.keys():
    1.37 +                        column_percentage[aa] += contribution
    1.38 +                    else:
    1.39 +                        column_percentage[aa] = contribution
    1.40 +            all_columns.append(column_percentage)
    1.41 +        # Second, map these percentages onto the alignment
    1.42 +        self.identity_percentages = {}
    1.43 +        for seq in self.sequences:
    1.44 +            self.identity_percentages[seq] = []
    1.45 +        for seq in self.identity_percentages:
    1.46 +            line = self.identity_percentages[seq]
    1.47 +            for position in range(len(self)):
    1.48 +                if self.alignment[seq][position] is not None:
    1.49 +                    aa = self.alignment[seq][position].code
    1.50 +                else:
    1.51 +                    aa = None
    1.52 +                line.append(all_columns[position].get(aa))
    1.53 +        return self.identity_percentages
    1.54  
    1.55      @staticmethod
    1.56      def get_from_fasta(file):
    1.57 @@ -87,4 +127,3 @@
    1.58                  sequences.append(sequence.Sequence(name,None,monomers))
    1.59              alignment[sequences[-1]]=alignment_list
    1.60          return sequences,alignment
    1.61 -