allpy
changeset 4:0905a39738f2
project.py: EXTREMELY ugly method to calculate the identity percentage of alignment columns for colouring has been added. It will be improved later.
author | grishin@gorilla |
---|---|
date | Wed, 09 Jun 2010 14:43:04 +0400 |
parents | 4afd86909244 |
children | 76816df24fe2 |
files | lib/project.py |
diffstat | 1 files changed, 40 insertions(+), 1 deletions(-) [+] |
line diff
1.1 --- a/lib/project.py Wed Jun 09 14:40:42 2010 +0400 1.2 +++ b/lib/project.py Wed Jun 09 14:43:04 2010 +0400 1.3 @@ -9,6 +9,7 @@ 1.4 1.5 import sequence 1.6 import monomer 1.7 +import allpy_data 1.8 1.9 class Project(object): 1.10 """ 1.11 @@ -44,6 +45,45 @@ 1.12 1.13 def __len__(self): 1.14 return max([len(line) for line in self.alignment.values()]) 1.15 + 1.16 + def calc_identity(self): 1.17 + """ Calculate the identity of alignment positions for colouring. 1.18 + 1.19 + For every (row, column) in alignment the percentage of the exactly 1.20 + same residue in the same column in the alignment is calculated. 1.21 + The data structure is just like the Project.alignment, but istead of 1.22 + monomers it contains float percentages. 1.23 + """ 1.24 + # Oh, God, that's awful! Absolutely not understandable. 1.25 + # First, calculate percentages of amino acids in every column 1.26 + contribution = 1.0 / len(self.sequences) 1.27 + all_columns = [] 1.28 + for position in range(len(self)): 1.29 + column_percentage = {} 1.30 + for seq in self.alignment: 1.31 + if self.alignment[seq][position] is not None: 1.32 + aa = self.alignment[seq][position].code 1.33 + else: 1.34 + aa = None 1.35 + if aa in allpy_data.amino_acids: 1.36 + if aa in column_percentage.keys(): 1.37 + column_percentage[aa] += contribution 1.38 + else: 1.39 + column_percentage[aa] = contribution 1.40 + all_columns.append(column_percentage) 1.41 + # Second, map these percentages onto the alignment 1.42 + self.identity_percentages = {} 1.43 + for seq in self.sequences: 1.44 + self.identity_percentages[seq] = [] 1.45 + for seq in self.identity_percentages: 1.46 + line = self.identity_percentages[seq] 1.47 + for position in range(len(self)): 1.48 + if self.alignment[seq][position] is not None: 1.49 + aa = self.alignment[seq][position].code 1.50 + else: 1.51 + aa = None 1.52 + line.append(all_columns[position].get(aa)) 1.53 + return self.identity_percentages 1.54 1.55 @staticmethod 1.56 def get_from_fasta(file): 1.57 @@ -87,4 +127,3 @@ 1.58 sequences.append(sequence.Sequence(name,None,monomers)) 1.59 alignment[sequences[-1]]=alignment_list 1.60 return sequences,alignment 1.61 -