rev |
line source |
bnagaev@240
|
1 import base |
me@353
|
2 import data.codes |
dendik@1100
|
3 from data.genetic_code import standard_dna_code |
me@280
|
4 |
dendik@382
|
5 import dna |
dendik@1100
|
6 import protein |
dendik@382
|
7 |
me@328
|
8 class Monomer(base.Monomer): |
me@328
|
9 """DNA monomers: nucleotides.""" |
me@328
|
10 type = 'dna' |
dendik@382
|
11 types = dna |
me@280
|
12 by_code1 = {} |
me@280
|
13 by_code3 = {} |
me@280
|
14 by_name = {} |
me@353
|
15 Monomer._initialize(data.codes.dna) |
bnagaev@240
|
16 |
bnagaev@240
|
17 class Sequence(base.Sequence): |
dendik@382
|
18 types = dna |
dendik@382
|
19 |
dendik@930
|
20 def reverse_complemented(self): |
dendik@930
|
21 """Return a new sequence, reverse-complement to self. |
dendik@930
|
22 |
dendik@930
|
23 Name of the sequence is name of self with apostrophe added. |
dendik@930
|
24 """ |
dendik@930
|
25 complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'} |
dendik@1091
|
26 result = self.types.Sequence( |
dendik@1091
|
27 name=self.name +"'", |
dendik@1091
|
28 description=self.description, |
dendik@1091
|
29 source=self.source |
dendik@930
|
30 ) |
dendik@1091
|
31 for monomer in reversed(self): |
dendik@1091
|
32 result.append_monomer(complement.get(monomer.code1, 'N')) |
dendik@1091
|
33 return result |
dendik@930
|
34 |
dendik@1100
|
35 def translated(self, code=None, name=None, description=None, source=None): |
dendik@1100
|
36 """Return a new protein sequence translated from self. |
dendik@1100
|
37 |
dendik@1100
|
38 `code` is a dict of triplet of dna `code1`s -> aminoacid `name`. |
dendik@1100
|
39 |
dendik@1100
|
40 If `code` is not specified, the standard genetic code is used. |
dendik@1100
|
41 |
dendik@1100
|
42 If `code` is specified, it may contain only the changed codons. |
dendik@1100
|
43 |
dendik@1100
|
44 Class of proteins to use is `self.types.protein.Sequence`, you |
dendik@1100
|
45 are free to replace it at will. |
dendik@1100
|
46 |
dendik@1100
|
47 Return new protein sequence where: |
dendik@1100
|
48 |
dendik@1100
|
49 * `name` is self.name with "_tr" appended |
dendik@1100
|
50 * `description` is self.description with " translated" appended |
dendik@1100
|
51 * `source` is the same as self.source |
dendik@1100
|
52 """ |
dendik@1100
|
53 if code: |
dendik@1100
|
54 code, modification = dict(standard_dna_code), code |
dendik@1100
|
55 code.update(modification) |
dendik@1100
|
56 else: |
dendik@1100
|
57 code = standard_dna_code |
dendik@1100
|
58 result = self.types.protein.Sequence([], |
dendik@1100
|
59 name=name or self.name + "_tr", |
dendik@1100
|
60 description=description or self.description + " translated", |
dendik@1100
|
61 source=source or self.source |
dendik@1100
|
62 ) |
dendik@1100
|
63 seen_stop = False |
dendik@1100
|
64 for a, b, c in zip(self[::3], self[1::3], self[2::3]): |
dendik@1100
|
65 assert not seen_stop, "Stop-codon must be the last one" |
dendik@1100
|
66 triplet = a.code1 + b.code1 + c.code1 |
dendik@1100
|
67 aa_name = code[triplet] |
dendik@1100
|
68 if aa_name == "Stop": |
dendik@1100
|
69 seen_stop = True |
dendik@1100
|
70 continue |
dendik@1100
|
71 result.append_monomer(name=aa_name) |
dendik@1100
|
72 return result |
dendik@1100
|
73 |
dendik@931
|
74 class Column(base.Column): |
dendik@931
|
75 types = dna |
dendik@931
|
76 |
dendik@382
|
77 class Alignment(base.Alignment): |
dendik@382
|
78 types = dna |
bnagaev@240
|
79 |
me@341
|
80 class Block(Alignment, base.Block): |
me@341
|
81 pass |
me@341
|
82 |
me@280
|
83 # vim: set ts=4 sts=4 sw=4 et: |