Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/06659228cd6f/allpy/pdb.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 02:35:27 2014
Кодировка:
allpy: allpy/pdb.py annotate

allpy

annotate allpy/pdb.py @ 386:06659228cd6f

fix bug in allpy/pdb.py
author boris <bnagaev@gmail.com>
date Wed, 02 Feb 2011 16:21:04 +0300
parents d7fc6865ce58
children
rev   line source
bnagaev@147 1 """ Functions to get pdb information from fasta id
bnagaev@138 2 and to generate fasta id from pdb information
bnagaev@138 3
bnagaev@138 4 pdb information: code, chain, model
bnagaev@138 5
bnagaev@138 6 TODO: same for local pdb files
bnagaev@138 7 """
bnagaev@138 8
bnagaev@248 9 import re
me@308 10 import os
me@308 11 import os.path
me@308 12 from tempfile import NamedTemporaryFile
me@308 13 import urllib2
me@308 14
bnagaev@248 15 from Bio.PDB import PDBParser
me@274 16 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO
me@308 17 from Bio.PDB.DSSP import make_dssp_dict
me@308 18
me@308 19 import base
bnagaev@385 20 import processors
bnagaev@385 21 import config
me@314 22 from graph import Graph
bnagaev@248 23
bnagaev@248 24
bnagaev@79 25 # for pdb-codes
bnagaev@151 26 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
bnagaev@79 27
bnagaev@138 28 def pdb_id_parse(ID):
bnagaev@139 29 match = re1.search(ID)
bnagaev@138 30 if not match:
bnagaev@138 31 return None
bnagaev@139 32 d = match.groupdict()
bnagaev@139 33 if 'chain' not in d or not d['chain']:
bnagaev@139 34 d['chain'] = ' '
bnagaev@139 35 if 'model' not in d or not d['model']:
bnagaev@139 36 d['model'] = 0
bnagaev@139 37 return d
me@270 38
me@270 39
bnagaev@156 40 def get_structure(file, name):
bnagaev@157 41 return PDBParser().get_structure(name, file)
me@270 42
me@345 43 class SequenceMixin(base.Sequence):
me@345 44 """Mixin for adding PDB data to a Sequence.
me@345 45
me@345 46 Please note: since this is a mixin, no objects of this class should be
me@345 47 created. This class is intended to be subclassed together with one of
me@345 48 Sequence classes.
me@274 49
me@274 50 Attributes:
me@274 51
me@274 52 * pdb_chain -- Bio.PDB.Chain
me@274 53 * pdb_residues -- {Monomer: Bio.PDB.Residue}
me@274 54
me@274 55 ?TODO: global pdb_structures
me@274 56 """
me@274 57
bnagaev@385 58 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain='A', pdb_model=0):
me@274 59 """ Reads Pdb chain from file
me@274 60
me@274 61 and align each Monomer with PDB.Residue (TODO)
me@274 62 """
bnagaev@385 63 structure = get_structure(pdb_file, self.name)
me@274 64 chain = structure[pdb_model][pdb_chain]
bnagaev@385 65 self.pdb_chain = chain
bnagaev@385 66 self.pdb_residues = {}
bnagaev@385 67 pdb_sequence = self.__class__.from_pdb_chain(chain)
bnagaev@385 68 Alignment = self.types.Alignment
bnagaev@385 69 a = Alignment()
bnagaev@385 70 a.append_sequence(self)
bnagaev@385 71 a.append_sequence(pdb_sequence)
bnagaev@385 72 a.process(processors.Muscle())
bnagaev@385 73 for monomer, pdb_monomer in a.columns_as_lists():
bnagaev@385 74 residue = pdb_sequence.pdb_residues[pdb_monomer]
bnagaev@385 75 self.pdb_residues[monomer] = residue
me@274 76
me@274 77 def pdb_unload(self):
me@274 78 """ Delete all pdb-connected links """
bnagaev@385 79 del self.pdb_chain
bnagaev@385 80 del self.pdb_residues
me@274 81
bnagaev@385 82 @classmethod
bnagaev@385 83 def from_pdb_chain(cls, chain):
me@274 84 """ Returns Sequence with Monomers with link to Bio.PDB.Residue
me@274 85
me@274 86 chain is Bio.PDB.Chain
me@274 87 """
me@274 88 cappbuilder = CaPPBuilder()
me@274 89 peptides = cappbuilder.build_peptides(chain)
bnagaev@385 90 Sequence = cls
bnagaev@385 91 Monomer = Sequence.types.Monomer
me@274 92 sequence = Sequence()
bnagaev@385 93 sequence.pdb_chain = chain
bnagaev@385 94 sequence.pdb_residues = {}
me@274 95 for peptide in peptides:
me@274 96 for ca_atom in peptide.get_ca_list():
me@274 97 residue = ca_atom.get_parent()
bnagaev@385 98 monomer = Monomer.from_code3(residue.get_resname())
bnagaev@385 99 sequence.pdb_residues[monomer] = residue
bnagaev@385 100 sequence.append(monomer)
me@274 101 return sequence
me@274 102
bnagaev@385 103 def auto_pdb(self, conformity=None, dir='/tmp', mask='%s.pdb'):
me@274 104 """ Adds pdb information to each monomer
me@274 105
me@274 106 Returns if information has been successfully added
me@274 107 TODO: conformity_file
me@274 108
me@274 109 id-format lava flow
me@274 110 """
bnagaev@385 111 match = pdb_id_parse(self.name)
bnagaev@385 112 if not match:
bnagaev@385 113 return False
bnagaev@385 114 code = match['code']
bnagaev@385 115 chain = match['chain']
bnagaev@385 116 model = match['model']
bnagaev@385 117 user_path = os.path.join(dir, mask % self.name)
bnagaev@385 118 path = user_path
bnagaev@385 119 if not os.path.exists(path) or not os.path.getsize(path):
bnagaev@385 120 path = config.pdb_path % code
bnagaev@385 121 if not os.path.exists(path) or not os.path.getsize(path):
bnagaev@385 122 url = config.pdb_url % code
bnagaev@385 123 print "Download %s" % url
bnagaev@385 124 path = user_path
bnagaev@385 125 data = urllib2.urlopen(url).read()
bnagaev@385 126 pdb_file = open(path, 'w')
bnagaev@385 127 pdb_file.write(data)
bnagaev@385 128 pdb_file.close()
bnagaev@385 129 print "Save %s" % path
bnagaev@385 130 self.set_pdb_chain(open(path), code, chain, model)
bnagaev@385 131 return True
me@274 132
me@274 133 def pdb_save(self, out_filename, pdb_chain):
me@274 134 """ Saves pdb_chain to out_file """
bnagaev@385 135 class MySelect(Select):
me@274 136 def accept_chain(self, chain):
me@274 137 if chain == pdb_chain:
me@274 138 return 1
me@274 139 else:
me@274 140 return 0
me@274 141 io = PDBIO()
me@274 142 structure = chain.get_parent()
me@274 143 io.set_structure(structure)
bnagaev@385 144 io.save(out_filename, MySelect())
me@274 145
bnagaev@385 146 def ca_atoms(self, sequence):
bnagaev@385 147 """ Iterates Ca-atom of monomers of this sequence """
bnagaev@385 148 return (pdb_residues[monomer]['CA'] for monomer in self)
me@296 149
me@345 150 class AlignmentMixin(base.Alignment):
me@345 151 """Mixin to add 3D properties to alignments.
me@345 152
me@345 153 Please note: since this is a mixin, no objects of this class should be
me@345 154 created. This class is intended to be subclassed together with one of
me@345 155 Alignment classes.
me@345 156 """
bnagaev@385 157 pass
me@296 158
me@345 159 class BlockMixin(base.Block):
me@345 160 """Mixin to add 3D properties to blocks.
me@345 161
me@345 162 Please note: since this is a mixin, no objects of this class should be
me@345 163 created. This class is intended to be subclassed together with one of
me@345 164 Block classes.
me@345 165 """
me@308 166
me@309 167 def geometrical_cores(self, max_delta=config.delta,
me@309 168 timeout=config.timeout, minsize=config.minsize,
bnagaev@385 169 ac_new_atoms=config.ac_new_atoms, ac_count=config.ac_count):
bnagaev@385 170 """ Returns length-sorted list of GCs
bnagaev@385 171 GC is set of columns
me@309 172
me@309 173 * max_delta -- threshold of distance spreading
me@309 174 * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
me@309 175 * minsize -- min size of each core
me@309 176 * ac_new_atoms -- min part or new atoms in new alternative core
me@309 177 current GC is compared with each of already selected GCs if
me@309 178 difference is less then ac_new_atoms, current GC is skipped
me@309 179 difference = part of new atoms in current core
me@309 180 * ac_count -- max number of cores (including main core)
me@309 181 -1 means infinity
me@309 182
me@309 183 cost is calculated as 1 / (delta + 1)
me@309 184 delta in [0, +inf) => cost in (0, 1]
me@309 185 """
bnagaev@385 186 nodes = self.columns
me@309 187 lines = {}
bnagaev@385 188 for i, column1 in enumerate(self.columns):
bnagaev@385 189 for j, column2 in enumerate(self.columns):
me@309 190 if i < j:
me@309 191 distances = []
me@309 192 for sequence in self.sequences:
bnagaev@385 193 if sequence in column1 and sequence in column2:
bnagaev@385 194 m1 = column1[sequence]
bnagaev@385 195 m2 = column2[sequence]
bnagaev@385 196 r1 = sequence.pdb_residues[m1]
bnagaev@385 197 r2 = sequence.pdb_residues[m2]
bnagaev@385 198 ca1 = r1['CA']
bnagaev@385 199 ca2 = r2['CA']
bnagaev@385 200 d = ca1 - ca2 # Bio.PDB feature
bnagaev@385 201 distances.append(d)
me@309 202 if len(distances) >= 2:
me@309 203 delta = max(distances) - min(distances)
me@309 204 if delta <= max_delta:
bnagaev@385 205 line = Graph.line(column1, column2)
bnagaev@385 206 lines[line] = 1.0 / (1.0 + max_delta)
me@309 207 graph = Graph(nodes, lines)
me@309 208 cliques = graph.cliques(timeout=timeout, minsize=minsize)
me@309 209 GCs = []
me@309 210 for clique in cliques:
me@309 211 for GC in GCs:
bnagaev@385 212 new_nodes = clique - GC
bnagaev@386 213 if len(new_nodes) < ac_new_atoms * len(clique):
me@309 214 break
bnagaev@385 215 else:
bnagaev@385 216 GCs.append(clique)
bnagaev@385 217 return GCs
me@309 218
me@308 219
me@308 220
me@308 221 def superimpose(self):
me@308 222 """ Superimpose all pdb_chains in this block """
me@308 223 sequences_chains = list(self.sequences_chains())
me@308 224 if len(sequences_chains) >= 1:
me@308 225 sup = Superimposer()
me@308 226 fixed_sequence, fixed_chain = sequences_chains.pop()
me@308 227 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
me@308 228 for sequence, chain in sequences_chains:
me@308 229 moving_atoms = self.ca_atoms(sequence, chain)
me@308 230 sup.set_atoms(fixed_atoms, moving_atoms)
me@308 231 # Apply rotation/translation to the moving atoms
me@308 232 sup.apply(moving_atoms)
me@308 233
me@308 234 def pdb_save(self, out_file):
me@308 235 """ Save all sequences
me@308 236
me@308 237 Returns {(sequence, chain): CHAIN}
me@308 238 CHAIN is chain letter in new file
me@308 239 """
me@308 240 tmp_file = NamedTemporaryFile(delete=False)
me@308 241 tmp_file.close()
me@308 242
me@308 243 for sequence, chain in self.sequences_chains():
me@308 244 sequence.pdb_save(tmp_file.name, chain)
me@308 245 # TODO: read from tmp_file.name
me@308 246 # change CHAIN
me@308 247 # add to out_file
me@308 248
me@308 249 os.unlink(NamedTemporaryFile)
me@308 250
me@274 251 # vim: set ts=4 sts=4 sw=4 et: