Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/57894fcb7792/allpy/pdb.py
Дата изменения: Unknown
Дата индексирования: Sun Feb 3 18:05:26 2013
Кодировка:
allpy: 57894fcb7792 allpy/pdb.py

allpy

view allpy/pdb.py @ 368:57894fcb7792

Block.flush_left() -> Alignment.flush("left"); implemented flushing left, right and center. (see #25)
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Wed, 26 Jan 2011 21:17:01 +0300
parents 3347b5f31477
children d7fc6865ce58
line source
1 """ Functions to get pdb information from fasta id
2 and to generate fasta id from pdb information
4 pdb information: code, chain, model
6 TODO: same for local pdb files
7 """
9 import re
10 import os
11 import os.path
12 from tempfile import NamedTemporaryFile
13 import urllib2
15 from Bio.PDB import PDBParser
16 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO
17 from Bio.PDB.DSSP import make_dssp_dict
19 import base
20 from graph import Graph
23 # for pdb-codes
24 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
26 #~ # for files
27 #~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
29 def std_id(pdb_id, pdb_chain, pdb_model=None):
30 if pdb_model:
31 return "%s_%s_%s" % \
32 (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)
33 else:
34 return "%s_%s" % \
35 (pdb_id.lower().strip(), pdb_chain.upper().strip())
37 def pdb_id_parse(ID):
38 match = re1.search(ID)
39 if not match:
40 return None
41 d = match.groupdict()
42 if 'chain' not in d or not d['chain']:
43 d['chain'] = ' '
44 if 'model' not in d or not d['model']:
45 d['model'] = 0
46 return d
49 def get_structure(file, name):
50 return PDBParser().get_structure(name, file)
52 #~ def std_id_parse(ID):
53 #~ """
54 #~ Parse standart ID to pdb_code, chain and model
55 #~ """
56 #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():
57 #~ # it is file
58 #~ parseO = self.re2.search(ID) # files
59 #~ else:
60 #~ parseO = self.re1.search(ID.lower()) # pdb codes
61 #~ if not parseO:
62 #~ return None
63 #~ parse = parseO.groups()
64 #~ if len(parse) < 2:
65 #~ return None
66 #~ code = parse[1]
67 #~ chain = ''
68 #~ model = None
69 #~ if len(parse) >= 4:
70 #~ chain = parse[3]
71 #~ if chain:
72 #~ chain = chain.upper()
73 #~ if len(parse) >= 6:
74 #~ if parse[5]:
75 #~ model = parse[5]
76 #~ return code, chain, model
78 class SequenceMixin(base.Sequence):
79 """Mixin for adding PDB data to a Sequence.
81 Please note: since this is a mixin, no objects of this class should be
82 created. This class is intended to be subclassed together with one of
83 Sequence classes.
85 Attributes:
87 * pdb_chain -- Bio.PDB.Chain
88 * pdb_file -- file object
90 * pdb_residues -- {Monomer: Bio.PDB.Residue}
91 * pdb_secstr -- {Monomer: 'Secondary structure'}
92 Code Secondary structure
93 H alpha-helix
94 B Isolated beta-bridge residue
95 E Strand
96 G 3-10 helix
97 I pi-helix
98 T Turn
99 S Bend
100 - Other
103 ?TODO: global pdb_structures
104 """
106 def __init__(self, *args, **kw):
107 self.pdb_chains = []
108 self.pdb_files = {}
109 self.pdb_residues = {}
110 self.pdb_secstr = {}
112 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
113 """ Reads Pdb chain from file
115 and align each Monomer with PDB.Residue (TODO)
116 """
117 name = std_id(pdb_id, pdb_chain, pdb_model)
118 structure = get_structure(pdb_file, name)
119 chain = structure[pdb_model][pdb_chain]
120 self.pdb_chains.append(chain)
121 self.pdb_residues[chain] = {}
122 self.pdb_secstr[chain] = {}
123 pdb_sequence = Sequence.from_pdb_chain(chain)
124 a = alignment.Alignment.from_sequences(self, pdb_sequence)
125 a.muscle_align()
126 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
127 if pdb_sequence.pdb_has(chain, pdb_monomer):
128 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
129 self.pdb_residues[chain][monomer] = residue
130 self.pdb_files[chain] = pdb_file
132 def pdb_unload(self):
133 """ Delete all pdb-connected links """
134 #~ gc.get_referrers(self.pdb_chains[0])
135 self.pdb_chains = []
136 self.pdb_residues = {}
137 self.pdb_secstr = {} # FIXME
138 self.pdb_files = {} # FIXME
140 @staticmethod
141 def from_pdb_chain(chain):
142 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
144 chain is Bio.PDB.Chain
145 """
146 cappbuilder = CaPPBuilder()
147 peptides = cappbuilder.build_peptides(chain)
148 sequence = Sequence()
149 sequence.pdb_chains = [chain]
150 sequence.pdb_residues[chain] = {}
151 sequence.pdb_secstr[chain] = {}
152 for peptide in peptides:
153 for ca_atom in peptide.get_ca_list():
154 residue = ca_atom.get_parent()
155 monomer = AminoAcidType.from_pdb_residue(residue).instance()
156 sequence.pdb_residues[chain][monomer] = residue
157 sequence.monomers.append(monomer)
158 return sequence
160 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
161 """ Adds pdb information to each monomer
163 Returns if information has been successfully added
164 TODO: conformity_file
166 id-format lava flow
167 """
168 if not conformity_info:
169 path = os.path.join(pdb_directory, self.name)
170 if os.path.exists(path) and os.path.getsize(path):
171 match = pdb_id_parse(self.name)
172 self.pdb_chain_add(open(path), match['code'],
173 match['chain'], match['model'])
174 else:
175 match = pdb_id_parse(self.name)
176 if match:
177 code = match['code']
178 pdb_filename = config.pdb_dir % code
179 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
180 url = config.pdb_url % code
181 print "Download %s" % url
182 pdb_file = open(pdb_filename, 'w')
183 data = urllib2.urlopen(url).read()
184 pdb_file.write(data)
185 pdb_file.close()
186 print "Save %s" % pdb_filename
187 pdb_file = open(pdb_filename)
188 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
190 def pdb_save(self, out_filename, pdb_chain):
191 """ Saves pdb_chain to out_file """
192 class GlySelect(Select):
193 def accept_chain(self, chain):
194 if chain == pdb_chain:
195 return 1
196 else:
197 return 0
198 io = PDBIO()
199 structure = chain.get_parent()
200 io.set_structure(structure)
201 io.save(out_filename, GlySelect())
204 def pdb_add_sec_str(self, pdb_chain):
205 """ Add secondary structure data """
206 tmp_file = NamedTemporaryFile(delete=False)
207 tmp_file.close()
208 pdb_file = self.pdb_files[pdb_chain].name
209 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
210 dssp, keys = make_dssp_dict(tmp_file.name)
211 for monomer in self.monomers:
212 if self.pdb_has(pdb_chain, monomer):
213 residue = self.pdb_residues[pdb_chain][monomer]
214 try:
215 d = dssp[(pdb_chain.get_id(), residue.get_id())]
216 self.pdb_secstr[pdb_chain][monomer] = d[1]
217 except:
218 print "No dssp information about %s at %s" % (monomer, pdb_chain)
219 os.unlink(tmp_file.name)
221 def pdb_has(self, chain, monomer):
222 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
224 def secstr_has(self, chain, monomer):
225 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
228 class AlignmentMixin(base.Alignment):
229 """Mixin to add 3D properties to alignments.
231 Please note: since this is a mixin, no objects of this class should be
232 created. This class is intended to be subclassed together with one of
233 Alignment classes.
234 """
236 def secstr(self, sequence, pdb_chain, gap='-'):
237 """ Returns string representing secondary structure """
238 return ''.join([
239 (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)
240 for m in self.body[sequence]])
242 class BlockMixin(base.Block):
243 """Mixin to add 3D properties to blocks.
245 Please note: since this is a mixin, no objects of this class should be
246 created. This class is intended to be subclassed together with one of
247 Block classes.
248 """
250 def geometrical_cores(self, max_delta=config.delta,
251 timeout=config.timeout, minsize=config.minsize,
252 ac_new_atoms=config.ac_new_atoms,
253 ac_count=config.ac_count):
254 """ Returns length-sorted list of blocks, representing GCs
256 * max_delta -- threshold of distance spreading
257 * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
258 * minsize -- min size of each core
259 * ac_new_atoms -- min part or new atoms in new alternative core
260 current GC is compared with each of already selected GCs if
261 difference is less then ac_new_atoms, current GC is skipped
262 difference = part of new atoms in current core
263 * ac_count -- max number of cores (including main core)
264 -1 means infinity
266 If more than one pdb chain for some sequence provided, consider all of them
267 cost is calculated as 1 / (delta + 1)
269 delta in [0, +inf) => cost in (0, 1]
270 """
271 nodes = self.positions
272 lines = {}
273 for i in self.positions:
274 for j in self.positions:
275 if i < j:
276 distances = []
277 for sequence in self.sequences:
278 for chain in sequence.pdb_chains:
279 m1 = self.alignment.body[sequence][i]
280 m2 = self.alignment.body[sequence][j]
281 if m1 and m2:
282 r1 = sequence.pdb_residues[chain][m1]
283 r2 = sequence.pdb_residues[chain][m2]
284 ca1 = r1['CA']
285 ca2 = r2['CA']
286 d = ca1 - ca2 # Bio.PDB feature
287 distances.append(d)
288 if len(distances) >= 2:
289 delta = max(distances) - min(distances)
290 if delta <= max_delta:
291 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
292 graph = Graph(nodes, lines)
293 cliques = graph.cliques(timeout=timeout, minsize=minsize)
294 GCs = []
295 for clique in cliques:
296 for GC in GCs:
297 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
298 break
300 def ca_atoms(self, sequence, pdb_chain):
301 """ Iterates Ca-atom of monomers of this sequence from this block """
302 return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
304 def sequences_chains(self):
305 """ Iterates pairs (sequence, chain) """
306 for sequence in self.alignment.sequences:
307 if sequence in self.sequences:
308 for chain in sequence.pdb_chains:
309 yield (sequence, chain)
311 def superimpose(self):
312 """ Superimpose all pdb_chains in this block """
313 sequences_chains = list(self.sequences_chains())
314 if len(sequences_chains) >= 1:
315 sup = Superimposer()
316 fixed_sequence, fixed_chain = sequences_chains.pop()
317 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
318 for sequence, chain in sequences_chains:
319 moving_atoms = self.ca_atoms(sequence, chain)
320 sup.set_atoms(fixed_atoms, moving_atoms)
321 # Apply rotation/translation to the moving atoms
322 sup.apply(moving_atoms)
324 def pdb_save(self, out_file):
325 """ Save all sequences
327 Returns {(sequence, chain): CHAIN}
328 CHAIN is chain letter in new file
329 """
330 tmp_file = NamedTemporaryFile(delete=False)
331 tmp_file.close()
333 for sequence, chain in self.sequences_chains():
334 sequence.pdb_save(tmp_file.name, chain)
335 # TODO: read from tmp_file.name
336 # change CHAIN
337 # add to out_file
339 os.unlink(NamedTemporaryFile)
341 # vim: set ts=4 sts=4 sw=4 et: