rev |
line source |
BurkovBA@0
|
1 #!usr/bin/python |
BurkovBA@0
|
2 |
BurkovBA@0
|
3 import sys |
BurkovBA@0
|
4 |
BurkovBA@0
|
5 import project |
BurkovBA@0
|
6 import sequence |
BurkovBA@0
|
7 import monomer |
bnagaev@116
|
8 import config |
bnagaev@121
|
9 from graph import Graph |
BurkovBA@0
|
10 |
BurkovBA@0
|
11 class Block(object): |
BurkovBA@0
|
12 """ |
BurkovBA@0
|
13 Mandatory data: |
BurkovBA@0
|
14 * self.project -- project object, which the block belongs to |
BurkovBA@1
|
15 * self.sequences - set of sequence objects that contain monomers |
BurkovBA@0
|
16 and/or gaps, that constitute the block |
bnagaev@115
|
17 * self.positions -- sorted list of positions of the project.alignment that |
BurkovBA@1
|
18 are included in the block |
bnagaev@116
|
19 |
bnagaev@132
|
20 Don't change self.sequences -- it may be a link to other block.sequences |
bnagaev@132
|
21 |
BurkovBA@0
|
22 How to create a new block: |
BurkovBA@0
|
23 >>> import project |
BurkovBA@0
|
24 >>> import block |
BurkovBA@0
|
25 >>> proj = project.Project(open("test.fasta")) |
bnagaev@114
|
26 >>> block1 = block.Block(proj) |
BurkovBA@0
|
27 """ |
BurkovBA@0
|
28 |
bnagaev@112
|
29 def __init__(self, project, sequences=None, positions=None): |
bnagaev@112
|
30 """ |
bnagaev@112
|
31 Builds new block from project |
bnagaev@112
|
32 if sequences==None, all sequences are used |
bnagaev@112
|
33 if positions==None, all positions are used |
bnagaev@112
|
34 """ |
bnagaev@112
|
35 if sequences == None: |
bnagaev@132
|
36 sequences = set(project.sequences) # copy |
bnagaev@112
|
37 if positions == None: |
bnagaev@112
|
38 positions = range(len(project)) |
BurkovBA@73
|
39 self.project = project |
BurkovBA@73
|
40 self.sequences = sequences |
BurkovBA@73
|
41 self.positions = positions |
BurkovBA@0
|
42 |
bnagaev@137
|
43 def save_fasta(self, out_file, long_line=60, gap='-'): |
bnagaev@112
|
44 """ |
bnagaev@112
|
45 Saves alignment to given file in fasta-format |
bnagaev@112
|
46 Splits long lines to substrings of length=long_line |
bnagaev@112
|
47 To prevent this, set long_line=None |
bnagaev@112
|
48 |
BurkovBA@0
|
49 No changes in the names, descriptions or order of the sequences |
BurkovBA@0
|
50 are made. |
BurkovBA@0
|
51 """ |
BurkovBA@0
|
52 for sequence in self.sequences: |
bnagaev@112
|
53 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__) |
bnagaev@113
|
54 alignment_monomers = self.project.alignment[sequence] |
bnagaev@115
|
55 block_monomers = [alignment_monomers[i] for i in self.positions] |
bnagaev@113
|
56 string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) |
bnagaev@112
|
57 if long_line: |
bnagaev@112
|
58 for i in range(0, len(string) // long_line + 1): |
bnagaev@112
|
59 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line]) |
bnagaev@112
|
60 else: |
bnagaev@112
|
61 out_file.write("%s \n" % string) |
bnagaev@116
|
62 |
bnagaev@127
|
63 def geometrical_cores(self, max_delta=config.delta, |
bnagaev@129
|
64 timeout=config.timeout, minsize=config.minsize, |
bnagaev@129
|
65 ac_new_atoms=config.ac_new_atoms, |
bnagaev@129
|
66 ac_count=config.ac_count): |
bnagaev@116
|
67 """ |
bnagaev@129
|
68 returns length-sorted list of blocks, representing GCs |
bnagaev@126
|
69 |
bnagaev@129
|
70 max_delta -- threshold of distance spreading |
bnagaev@129
|
71 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) |
bnagaev@129
|
72 minsize -- min size of each core |
bnagaev@129
|
73 ac_new_atoms -- min part or new atoms in new alternative core |
bnagaev@129
|
74 current GC is compared with each of already selected GCs |
bnagaev@129
|
75 if difference is less then ac_new_atoms, current GC is skipped |
bnagaev@129
|
76 difference = part of new atoms in current core |
|