allpy
view lib/align.py @ 98:3d40d7b9a123
lib::project::muscle_align
project::save_fasta
sequence::operator==
fixes
test -- alignment done
author | boris <bnagaev@gmail.com> |
---|---|
date | Wed, 20 Oct 2010 23:33:29 +0400 |
parents | |
children |
line source
1 """
2 Usage:
4 seq_in=[]
5 seq_in.append("SSNAKIDQLSSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRLDNM")
6 seq_in.append("NAKADQASSDAQTANAKADQASNDANAARSDAQAAKDDAARANQRADNAA")
7 l=AlignmentSeq(seq_in)
8 for t in l.new_sequences:
9 print t
11 Why not to reimplement it?
12 Lava flow
13 http://www.insidecpp.ru/antipatterns/lava_flow/
14 """
21 """
22 sequences are strings
24 new_sequences -- aligned sequences
25 connections -- list of dicts like { [new_atom_id] => old_atom_id }
26 common -- list of [list_of_letters, list_of_chain_numbers]
27 """
39 @staticmethod
41 """
42 returns cost of aligning of aminoacids a1 and a2
43 """
51 @staticmethod
53 """
54 returns penalty of appending (to the right end) one more gap
55 gaps_count -- real number of gaps
56 """
62 # TO BE DONE
64 #def unite(self, chainN):
65 #"""
66 #alignment list creation
67 #chainN - chain number
68 #"""
69 #str1 = self.old_sequences[chainN]
70 #len1 = len(str1)
72 #if not self.common:
73 #i = 0
74 #while i < len1:
75 #aminoacids = [str1[i]]
76 #chains = [chainN]
77 #self.common.append([aminoacids,chains])
78 #i += 1
79 #return
81 #len2 = len(self.common)
83 #d = []
84 #tip_from = []
85 #already_gaps = []
87 #for i in xrange(len1 + 1):
88 #d.append([])
89 #already_gaps.append([])
90 #tip_from.append([])
91 #for j in xrange(len2 + 1):
92 #d[i].append(0)
93 #already_gaps[i].append([0, 0])
94 #tip_from[i].append(0)
96 #for i in xrange(1, len1 + 1):
97 #for j in xrange(1, len2 + 1):
99 #costs = []
100 #for A in self.common[j - 1][0]:
101 #costs.append(self.cost(str1[i - 1], A))
102 #cost = max(costs)
104 #insertion = d[i - 1][j]
105 #if j != len2: # non-end gap
106 #insertion += self.gap_cost(already_gaps[i - 1][j][1])
108 #deletion = d[i][j - 1]
109 #if i != len1: # non-end gap
110 #deletion += self.gap_cost(already_gaps[i][j - 1][0])
112 #substitution = d[i - 1][j - 1] + cost
113 #max_way = max(insertion, deletion, substitution)
114 #d[i][j] = max_way
116 #if max_way == substitution:
117 #tip = 3
118 #if max_way == deletion:
119 #tip = 2
120 #if max_way == insertion:
121 #tip = 1
123 #if tip == 1: # insertion
124 #already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ]
125 #if tip == 2: # deletion
126 #already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ]
127 #if tip == 3: # substitution
128 #already_gaps[i][j]=[ 0, 0 ]
130 #tip_from[i][j] = tip
132 #i = len1
133 #j = len2
135 #common = []
137 #while i > 0 or j > 0:
138 #tip = tip_from[i][j]
140 #if tip == 1 or j == 0 and i > 0:
142 #aminoacids = [(str1[i-1])]
143 #chains = [chainN]
145 #common.append([aminoacids, chains])
147 #i -= 1
151 #if tip==2 or (i==0 and j>0):
153 #common.append(self.common[j-1])
154 #j-=1
157 #if (tip==3):
159 #chains=self.common[j-1][1]
160 #chains.append(chainN)
162 #aminoacids=self.common[j-1][0]
164 #if (not aminoacids.count(str1[i-1])):
165 #aminoacids.append(str1[i-1])
167 #common.append([aminoacids,chains])
169 #i-=1
170 #j-=1
174 #common.reverse()
176 #self.common=common
200 #def lining(self,chainN):
203 #"""
204 #????? ??????? ????? ??????????? ??????????????????
205 #? self.new_sequences
207 #chainN - ????? ????
208 #"""
210 #str1=self.old_sequences[chainN]
211 #len1=len(str1)
213 #len2=len(self.common)
216 #new_seq=''
217 #position_in_old=0
219 #for common_1 in self.common:
220 #if (common_1[1].count(chainN)):
221 #new_seq = new_seq + str1[position_in_old]
222 #position_in_old += 1
223 #else:
224 #new_seq = new_seq + '-'
226 #self.new_sequences.append(new_seq)