Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/c84a7840f9b8/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 17:56:45 2013
Кодировка:
allpy: c84a7840f9b8 lib/block.py

allpy

view lib/block.py @ 125:c84a7840f9b8

Backed out changeset 9f96bc38bc3d There can be more than one GC!!
author boris <bnagaev@gmail.com>
date Sat, 23 Oct 2010 23:50:50 +0400
parents 9f96bc38bc3d
children 69e1650feb6b
line source
1 #!usr/bin/python
3 import sys
5 import project
6 import sequence
7 import monomer
8 import config
9 from graph import Graph
11 class Block(object):
12 """
13 Mandatory data:
14 * self.project -- project object, which the block belongs to
15 * self.sequences - set of sequence objects that contain monomers
16 and/or gaps, that constitute the block
17 * self.positions -- sorted list of positions of the project.alignment that
18 are included in the block
20 How to create a new block:
21 >>> import project
22 >>> import block
23 >>> proj = project.Project(open("test.fasta"))
24 >>> block1 = block.Block(proj)
25 """
27 def __init__(self, project, sequences=None, positions=None):
28 """
29 Builds new block from project
30 if sequences==None, all sequences are used
31 if positions==None, all positions are used
32 """
33 if sequences == None:
34 sequences = project.sequences
35 if positions == None:
36 positions = range(len(project))
37 self.project = project
38 self.sequences = sequences
39 self.positions = positions
41 def save_fasta(self, out_file, long_line=60):
42 """
43 Saves alignment to given file in fasta-format
44 Splits long lines to substrings of length=long_line
45 To prevent this, set long_line=None
47 No changes in the names, descriptions or order of the sequences
48 are made.
49 """
50 for sequence in self.sequences:
51 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)
52 alignment_monomers = self.project.alignment[sequence]
53 block_monomers = [alignment_monomers[i] for i in self.positions]
54 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
55 if long_line:
56 for i in range(0, len(string) // long_line + 1):
57 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
58 else:
59 out_file.write("%s \n" % string)
61 def geometrical_core(self, max_delta=config.delta,
62 timeout=config.timeout, minsize=config.minsize):
63 """
64 returns sorted list of positions, representing geometrical core
65 delta -- threshold of distance spreading
67 If more than one pdb chain for some sequence provided, consider all of them
68 cost is calculated as 1 / (delta + 1)
69 delta in [0, +inf) => cost in (0, 1]
70 """
71 nodes = self.positions
72 lines = {}
73 for i in self.positions:
74 for j in self.positions:
75 if i < j:
76 distances = []
77 for sequence in self.sequences:
78 for chain in sequence.pdb_chains:
79 m1 = self.project.alignment[sequence][i]
80 m2 = self.project.alignment[sequence][j]
81 if m1 and m2:
82 ca1 = m1.pdb_residues[chain]['CA']
83 ca2 = m2.pdb_residues[chain]['CA']
84 d = ca1 - ca2 # Bio.PDB feature
85 distances.append(d)
86 if len(distances) >= 2:
87 delta = max(distances) - min(distances)
88 if delta <= max_delta:
89 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
90 graph = Graph(nodes, lines)
91 return graph.cliques(timeout=timeout, minsize=minsize)
93 def xstring(self, x):
94 """
95 Returns string consisting of '-' and chars x at self.positions
96 Length of returning string = length of project
97 """
98 monomers = [False] * len(self.project)
99 for i in self.positions:
100 monomers[i] = True
101 return ''.join([x if m else '-' for m in monomers])