Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/a6d287f9c901/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 17:56:52 2013
Кодировка:
allpy: a6d287f9c901 lib/block.py

allpy

view lib/block.py @ 129:a6d287f9c901

broken: lib::block::geometrical_cores returns list of blocks added new parameters to geometrical_core test passed
author boris <bnagaev@gmail.com>
date Sun, 24 Oct 2010 00:26:18 +0400
parents 3e69e4b38017
children 89de20cda19c
line source
1 #!usr/bin/python
3 import sys
5 import project
6 import sequence
7 import monomer
8 import config
9 from graph import Graph
10 from copy import copy
12 class Block(object):
13 """
14 Mandatory data:
15 * self.project -- project object, which the block belongs to
16 * self.sequences - set of sequence objects that contain monomers
17 and/or gaps, that constitute the block
18 * self.positions -- sorted list of positions of the project.alignment that
19 are included in the block
21 How to create a new block:
22 >>> import project
23 >>> import block
24 >>> proj = project.Project(open("test.fasta"))
25 >>> block1 = block.Block(proj)
26 """
28 def __init__(self, project, sequences=None, positions=None):
29 """
30 Builds new block from project
31 if sequences==None, all sequences are used
32 if positions==None, all positions are used
33 """
34 if sequences == None:
35 sequences = list(project.sequences)
36 if positions == None:
37 positions = range(len(project))
38 self.project = project
39 self.sequences = sequences
40 self.positions = positions
42 def save_fasta(self, out_file, long_line=60):
43 """
44 Saves alignment to given file in fasta-format
45 Splits long lines to substrings of length=long_line
46 To prevent this, set long_line=None
48 No changes in the names, descriptions or order of the sequences
49 are made.
50 """
51 for sequence in self.sequences:
52 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)
53 alignment_monomers = self.project.alignment[sequence]
54 block_monomers = [alignment_monomers[i] for i in self.positions]
55 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
56 if long_line:
57 for i in range(0, len(string) // long_line + 1):
58 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
59 else:
60 out_file.write("%s \n" % string)
62 def geometrical_cores(self, max_delta=config.delta,
63 timeout=config.timeout, minsize=config.minsize,
64 ac_new_atoms=config.ac_new_atoms,
65 ac_count=config.ac_count):
66 """
67 returns length-sorted list of blocks, representing GCs
69 max_delta -- threshold of distance spreading
70 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
71 minsize -- min size of each core
72 ac_new_atoms -- min part or new atoms in new alternative core
73 current GC is compared with each of already selected GCs
74 if difference is less then ac_new_atoms, current GC is skipped
75 difference = part of new atoms in current core
76 ac_count -- max number of cores (including main core)
78 If more than one pdb chain for some sequence provided, consider all of them
79 cost is calculated as 1 / (delta + 1)
80 delta in [0, +inf) => cost in (0, 1]
81 """
82 nodes = self.positions
83 lines = {}
84 for i in self.positions:
85 for j in self.positions:
86 if i < j:
87 distances = []
88 for sequence in self.sequences:
89 for chain in sequence.pdb_chains:
90 m1 = self.project.alignment[sequence][i]
91 m2 = self.project.alignment[sequence][j]
92 if m1 and m2:
93 ca1 = m1.pdb_residues[chain]['CA']
94 ca2 = m2.pdb_residues[chain]['CA']
95 d = ca1 - ca2 # Bio.PDB feature
96 distances.append(d)
97 if len(distances) >= 2:
98 delta = max(distances) - min(distances)
99 if delta <= max_delta:
100 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
101 graph = Graph(nodes, lines)
102 cliques = graph.cliques(timeout=timeout, minsize=minsize)
103 GCs = []
104 for clique in cliques:
105 for GC in GCs:
106 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
107 break
108 else:
109 GCs.append(Block(self.project, copy(self.sequences), clique))
110 if len(GCs) >= ac_count:
111 break
112 return GCs
114 def xstring(self, x='X'):
115 """
116 Returns string consisting of '-' and chars x at self.positions
117 Length of returning string = length of project
118 """
119 monomers = [False] * len(self.project)
120 for i in self.positions:
121 monomers[i] = True
122 return ''.join([x if m else '-' for m in monomers])