Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/e10310ed076c/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 18:00:02 2013
Кодировка:
allpy: e10310ed076c lib/block.py

allpy

view lib/block.py @ 140:e10310ed076c

geometrical_core.py: run building of GCs
author boris <bnagaev@gmail.com>
date Sun, 24 Oct 2010 18:15:41 +0400
parents d2b53635be05
children a0ff92b78b6a
line source
1 #!usr/bin/python
3 import sys
5 import project
6 import sequence
7 import monomer
8 import config
9 from graph import Graph
11 class Block(object):
12 """
13 Mandatory data:
14 * self.project -- project object, which the block belongs to
15 * self.sequences - set of sequence objects that contain monomers
16 and/or gaps, that constitute the block
17 * self.positions -- sorted list of positions of the project.alignment that
18 are included in the block
20 Don't change self.sequences -- it may be a link to other block.sequences
22 How to create a new block:
23 >>> import project
24 >>> import block
25 >>> proj = project.Project(open("test.fasta"))
26 >>> block1 = block.Block(proj)
27 """
29 def __init__(self, project, sequences=None, positions=None):
30 """
31 Builds new block from project
32 if sequences==None, all sequences are used
33 if positions==None, all positions are used
34 """
35 if sequences == None:
36 sequences = set(project.sequences) # copy
37 if positions == None:
38 positions = range(len(project))
39 self.project = project
40 self.sequences = sequences
41 self.positions = positions
43 def save_fasta(self, out_file, long_line=60, gap='-'):
44 """
45 Saves alignment to given file in fasta-format
46 Splits long lines to substrings of length=long_line
47 To prevent this, set long_line=None
49 No changes in the names, descriptions or order of the sequences
50 are made.
51 """
52 for sequence in self.sequences:
53 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)
54 alignment_monomers = self.project.alignment[sequence]
55 block_monomers = [alignment_monomers[i] for i in self.positions]
56 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
57 if long_line:
58 for i in range(0, len(string) // long_line + 1):
59 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
60 else:
61 out_file.write("%s \n" % string)
63 def geometrical_cores(self, max_delta=config.delta,
64 timeout=config.timeout, minsize=config.minsize,
65 ac_new_atoms=config.ac_new_atoms,
66 ac_count=config.ac_count):
67 """
68 returns length-sorted list of blocks, representing GCs
70 max_delta -- threshold of distance spreading
71 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
72 minsize -- min size of each core
73 ac_new_atoms -- min part or new atoms in new alternative core
74 current GC is compared with each of already selected GCs
75 if difference is less then ac_new_atoms, current GC is skipped
76 difference = part of new atoms in current core
77 ac_count -- max number of cores (including main core)
78 -1 means infinity
79 If more than one pdb chain for some sequence provided, consider all of them
80 cost is calculated as 1 / (delta + 1)
81 delta in [0, +inf) => cost in (0, 1]
82 """
83 nodes = self.positions
84 lines = {}
85 for i in self.positions:
86 for j in self.positions:
87 if i < j:
88 distances = []
89 for sequence in self.sequences:
90 for chain in sequence.pdb_chains:
91 m1 = self.project.alignment[sequence][i]
92 m2 = self.project.alignment[sequence][j]
93 if m1 and m2:
94 ca1 = m1.pdb_residues[chain]['CA']
95 ca2 = m2.pdb_residues[chain]['CA']
96 d = ca1 - ca2 # Bio.PDB feature
97 distances.append(d)
98 if len(distances) >= 2:
99 delta = max(distances) - min(distances)
100 if delta <= max_delta:
101 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
102 graph = Graph(nodes, lines)
103 cliques = graph.cliques(timeout=timeout, minsize=minsize)
104 GCs = []
105 for clique in cliques:
106 for GC in GCs:
107 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
108 break
109 else:
110 GCs.append(Block(self.project, self.sequences, clique))
111 if ac_count != -1 and len(GCs) >= ac_count:
112 break
113 return GCs
115 def xstring(self, x='X', gap='-'):
116 """
117 Returns string consisting of gap chars and chars x at self.positions
118 Length of returning string = length of project
119 """
120 monomers = [False] * len(self.project)
121 for i in self.positions:
122 monomers[i] = True
123 return ''.join([x if m else gap for m in monomers])
125 def save_xstring(self, out_file, name, description='', x='X', gap='-'):
126 """
127 Save xstring and name in fasta format
128 """
129 out_file.write(">%(name)s %(description)s \n" % \
130 {'name':name, 'description':description})
132 out_file.write(">%(xstring)s \n" % {'xstring':self.xstring(x=x, gap=gap)})