Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/3d40d7b9a123/lib/align.py
Дата изменения: Unknown
Дата индексирования: Sat Mar 1 09:22:37 2014
Кодировка:
allpy: 3d40d7b9a123 lib/align.py

allpy

view lib/align.py @ 98:3d40d7b9a123

lib::project::muscle_align project::save_fasta sequence::operator== fixes test -- alignment done
author boris <bnagaev@gmail.com>
date Wed, 20 Oct 2010 23:33:29 +0400
parents
children
line source
1 """
2 Usage:
4 seq_in=[]
5 seq_in.append("SSNAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM")
6 seq_in.append("NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNAA")
7 l=AlignmentSeq(seq_in)
8 for t in l.new_sequences:
9 print t
11 Why not to reimplement it?
12 Lava flow
13 http://www.insidecpp.ru/antipatterns/lava_flow/
14 """
16 from allpy_data.blossum62 import matrix, gaps
18 class AlignmentSeq(object):
20 def __init__(self, sequences):
21 """
22 sequences are strings
24 new_sequences -- aligned sequences
25 connections -- list of dicts like { [new_atom_id] => old_atom_id }
26 common -- list of [list_of_letters, list_of_chain_numbers]
27 """
28 self.old_sequences = []
29 self.new_sequences = []
30 self.connections = []
31 for seq in sequences:
32 self.old_sequences.append(seq.upper().replace('-','').replace(' ',''))
33 self.common=[]
34 for i in xrange(len(self.old_sequences)):
35 self.unite(i)
36 for i in xrange(len(self.old_sequences)):
37 self.lining(i)
39 @staticmethod
40 def cost(a1, a2):
41 """
42 returns cost of aligning of aminoacids a1 and a2
43 """
44 a1 = a1.upper()
45 a2 = a2.upper()
46 if a1 in matrix:
47 if a2 in matrix[a1]:
48 return matrix[a1][a2]
49 return gaps[0]
51 @staticmethod
52 def gap_cost(self, gaps_count):
53 """
54 returns penalty of appending (to the right end) one more gap
55 gaps_count -- real number of gaps
56 """
57 if gaps_count >= len(gaps):
58 return gaps[(len(gaps)-1)]
59 else:
60 return gaps[gaps_count]
62 # TO BE DONE
64 #def unite(self, chainN):
65 #"""
66 #alignment list creation
67 #chainN - chain number
68 #"""
69 #str1 = self.old_sequences[chainN]
70 #len1 = len(str1)
72 #if not self.common:
73 #i = 0
74 #while i < len1:
75 #aminoacids = [str1[i]]
76 #chains = [chainN]
77 #self.common.append([aminoacids,chains])
78 #i += 1
79 #return
81 #len2 = len(self.common)
83 #d = []
84 #tip_from = []
85 #already_gaps = []
87 #for i in xrange(len1 + 1):
88 #d.append([])
89 #already_gaps.append([])
90 #tip_from.append([])
91 #for j in xrange(len2 + 1):
92 #d[i].append(0)
93 #already_gaps[i].append([0, 0])
94 #tip_from[i].append(0)
96 #for i in xrange(1, len1 + 1):
97 #for j in xrange(1, len2 + 1):
99 #costs = []
100 #for A in self.common[j - 1][0]:
101 #costs.append(self.cost(str1[i - 1], A))
102 #cost = max(costs)
104 #insertion = d[i - 1][j]
105 #if j != len2: # non-end gap
106 #insertion += self.gap_cost(already_gaps[i - 1][j][1])
108 #deletion = d[i][j - 1]
109 #if i != len1: # non-end gap
110 #deletion += self.gap_cost(already_gaps[i][j - 1][0])
112 #substitution = d[i - 1][j - 1] + cost
113 #max_way = max(insertion, deletion, substitution)
114 #d[i][j] = max_way
116 #if max_way == substitution:
117 #tip = 3
118 #if max_way == deletion:
119 #tip = 2
120 #if max_way == insertion:
121 #tip = 1
123 #if tip == 1: # insertion
124 #already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ]
125 #if tip == 2: # deletion
126 #already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ]
127 #if tip == 3: # substitution
128 #already_gaps[i][j]=[ 0, 0 ]
130 #tip_from[i][j] = tip
132 #i = len1
133 #j = len2
135 #common = []
137 #while i > 0 or j > 0:
138 #tip = tip_from[i][j]
140 #if tip == 1 or j == 0 and i > 0:
142 #aminoacids = [(str1[i-1])]
143 #chains = [chainN]
145 #common.append([aminoacids, chains])
147 #i -= 1
151 #if tip==2 or (i==0 and j>0):
153 #common.append(self.common[j-1])
154 #j-=1
157 #if (tip==3):
159 #chains=self.common[j-1][1]
160 #chains.append(chainN)
162 #aminoacids=self.common[j-1][0]
164 #if (not aminoacids.count(str1[i-1])):
165 #aminoacids.append(str1[i-1])
167 #common.append([aminoacids,chains])
169 #i-=1
170 #j-=1
174 #common.reverse()
176 #self.common=common
200 #def lining(self,chainN):
203 #"""
204 #????? ??????? ????? ??????????? ??????????????????
205 #? self.new_sequences
207 #chainN - ????? ????
208 #"""
210 #str1=self.old_sequences[chainN]
211 #len1=len(str1)
213 #len2=len(self.common)
216 #new_seq=''
217 #position_in_old=0
219 #for common_1 in self.common:
220 #if (common_1[1].count(chainN)):
221 #new_seq = new_seq + str1[position_in_old]
222 #position_in_old += 1
223 #else:
224 #new_seq = new_seq + '-'
226 #self.new_sequences.append(new_seq)