Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/56bea23d3c73/lib/block.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 17:57:32 2013
Кодировка:
allpy: 56bea23d3c73 lib/block.py

allpy

view lib/block.py @ 184:56bea23d3c73

repeats: draft is completed, not tested TODO: take not exactly noticed repeats, but also part of environment (for muscle alignment)
author boris (netbook) <bnagaev@gmail.com>
date Wed, 10 Nov 2010 12:01:26 +0300
parents 45573ee48844
children
line source
1 #!usr/bin/python
3 import sys
5 import alignment
6 import sequence
7 import monomer
8 import config
9 from graph import Graph
10 from Bio.PDB import Superimposer
11 from tempfile import NamedTemporaryFile
12 import os
13 from fasta import save_fasta
15 class Block(object):
16 """ Block of alignment
18 Mandatory data:
19 * self.alignment -- alignment object, which the block belongs to
20 * self.sequences - set of sequence objects that contain monomers
21 and/or gaps, that constitute the block
22 * self.positions -- list of positions of the alignment.body that
23 are included in the block; position[i+1] is always to the right from position[i]
25 Don't change self.sequences -- it may be a link to other block.sequences
27 How to create a new block:
28 >>> import alignment
29 >>> import block
30 >>> proj = alignment.Alignment(open("test.fasta"))
31 >>> block1 = block.Block(proj)
32 """
34 def __init__(self, alignment, sequences=None, positions=None):
35 """ Builds new block from alignment
37 if sequences==None, all sequences are used
38 if positions==None, all positions are used
39 """
40 if sequences == None:
41 sequences = set(alignment.sequences) # copy
42 if positions == None:
43 positions = range(len(alignment))
44 self.alignment = alignment
45 self.sequences = sequences
46 self.positions = positions
48 def save_fasta(self, out_file, long_line=70, gap='-'):
49 """ Saves alignment to given file in fasta-format
51 No changes in the names, descriptions or order of the sequences
52 are made.
53 """
54 for sequence in self.sequences:
55 alignment_monomers = self.alignment.body[sequence]
56 block_monomers = [alignment_monomers[i] for i in self.positions]
57 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
58 save_fasta(out_file, string, sequence.name, sequence.description, long_line)
60 def geometrical_cores(self, max_delta=config.delta,
61 timeout=config.timeout, minsize=config.minsize,
62 ac_new_atoms=config.ac_new_atoms,
63 ac_count=config.ac_count):
64 """ Returns length-sorted list of blocks, representing GCs
66 max_delta -- threshold of distance spreading
67 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
68 minsize -- min size of each core
69 ac_new_atoms -- min part or new atoms in new alternative core
70 current GC is compared with each of already selected GCs
71 if difference is less then ac_new_atoms, current GC is skipped
72 difference = part of new atoms in current core
73 ac_count -- max number of cores (including main core)
74 -1 means infinity
75 If more than one pdb chain for some sequence provided, consider all of them
76 cost is calculated as 1 / (delta + 1)
77 delta in [0, +inf) => cost in (0, 1]
78 """
79 nodes = self.positions
80 lines = {}
81 for i in self.positions:
82 for j in self.positions:
83 if i < j:
84 distances = []
85 for sequence in self.sequences:
86 for chain in sequence.pdb_chains:
87 m1 = self.alignment.body[sequence][i]
88 m2 = self.alignment.body[sequence][j]
89 if m1 and m2:
90 r1 = sequence.pdb_residues[chain][m1]
91 r2 = sequence.pdb_residues[chain][m2]
92 ca1 = r1['CA']
93 ca2 = r2['CA']
94 d = ca1 - ca2 # Bio.PDB feature
95 distances.append(d)
96 if len(distances) >= 2:
97 delta = max(distances) - min(distances)
98 if delta <= max_delta:
99 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
100 graph = Graph(nodes, lines)
101 cliques = graph.cliques(timeout=timeout, minsize=minsize)
102 GCs = []
103 for clique in cliques:
104 for GC in GCs:
105 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
106 break
107 else:
108 GCs.append(Block(self.alignment, self.sequences, clique))
109 if ac_count != -1 and len(GCs) >= ac_count:
110 break
111 return GCs
113 def xstring(self, x='X', gap='-'):
114 """ Returns string consisting of gap chars and chars x at self.positions
116 Length of returning string = length of alignment
117 """
118 monomers = [False] * len(self.alignment)
119 for i in self.positions:
120 monomers[i] = True
121 return ''.join([x if m else gap for m in monomers])
123 def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70):
124 """ Save xstring and name in fasta format """
125 save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line)
127 def monomers(self, sequence):
128 """ Iterates monomers of this sequence from this block """
129 alignment_sequence = self.alignment.body[sequence]
130 return (alignment_sequence[i] for i in self.positions)
132 def ca_atoms(self, sequence, pdb_chain):
133 """ Iterates Ca-atom of monomers of this sequence from this block """
134 return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
136 def sequences_chains(self):
137 """ Iterates pairs (sequence, chain) """
138 for sequence in self.alignment.sequences:
139 if sequence in self.sequences:
140 for chain in sequence.pdb_chains:
141 yield (sequence, chain)
143 def superimpose(self):
144 """ Superimpose all pdb_chains in this block """
145 sequences_chains = list(self.sequences_chains())
146 if len(sequences_chains) >= 1:
147 sup = Superimposer()
148 fixed_sequence, fixed_chain = sequences_chains.pop()
149 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
150 for sequence, chain in sequences_chains:
151 moving_atoms = self.ca_atoms(sequence, chain)
152 sup.set_atoms(fixed_atoms, moving_atoms)
153 # Apply rotation/translation to the moving atoms
154 sup.apply(moving_atoms)
156 def pdb_save(self, out_file):
157 """ Save all sequences
159 Returns {(sequence, chain): CHAIN}
160 CHAIN is chain letter in new file
161 """
162 tmp_file = NamedTemporaryFile(delete=False)
163 tmp_file.close()
165 for sequence, chain in self.sequences_chains():
166 sequence.pdb_save(tmp_file.name, chain)
167 # TODO: read from tmp_file.name
168 # change CHAIN
169 # add to out_file
171 os.unlink(NamedTemporaryFile)