allpy: 57894fcb7792 allpy/pdb.py

allpy

view allpy/pdb.py @ 368:57894fcb7792

Block.flush_left() -> Alignment.flush("left"); implemented flushing left, right and center. (see #25)

author	Daniil Alexeyevsky <me.dendik@gmail.com>
date	Wed, 26 Jan 2011 21:17:01 +0300
parents	3347b5f31477
children	d7fc6865ce58

line source

1 """ Functions to get pdb information from fasta id

2 and to generate fasta id from pdb information

4 pdb information: code, chain, model

6 TODO: same for local pdb files

7 """

9 import re

10 import os

11 import os.path

12 from tempfile import NamedTemporaryFile

13 import urllib2

15 from Bio.PDB import PDBParser

16 from Bio.PDB import Superimposer, CaPPBuilder, PDBIO

17 from Bio.PDB.DSSP import make_dssp_dict

19 import base

20 from graph import Graph

23 # for pdb-codes

24 re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)

26 #~ # for files

27 #~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")

29 def std_id(pdb_id, pdb_chain, pdb_model=None):

30 if pdb_model:

31 return "%s_%s_%s" % \

32 (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)

33 else:

34 return "%s_%s" % \

35 (pdb_id.lower().strip(), pdb_chain.upper().strip())

37 def pdb_id_parse(ID):

38 match = re1.search(ID)

39 if not match:

40 return None

41 d = match.groupdict()

42 if 'chain' not in d or not d['chain']:

43 d['chain'] = ' '

44 if 'model' not in d or not d['model']:

45 d['model'] = 0

46 return d

49 def get_structure(file, name):

50 return PDBParser().get_structure(name, file)

52 #~ def std_id_parse(ID):

53 #~ """

54 #~ Parse standart ID to pdb_code, chain and model

55 #~ """

56 #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():

57 #~ # it is file

58 #~ parseO = self.re2.search(ID) # files

59 #~ else:

60 #~ parseO = self.re1.search(ID.lower()) # pdb codes

61 #~ if not parseO:

62 #~ return None

63 #~ parse = parseO.groups()

64 #~ if len(parse) < 2:

65 #~ return None

66 #~ code = parse[1]

67 #~ chain = ''

68 #~ model = None

69 #~ if len(parse) >= 4:

70 #~ chain = parse[3]

71 #~ if chain:

72 #~ chain = chain.upper()

73 #~ if len(parse) >= 6:

74 #~ if parse[5]:

75 #~ model = parse[5]

76 #~ return code, chain, model

78 class SequenceMixin(base.Sequence):

79 """Mixin for adding PDB data to a Sequence.

81 Please note: since this is a mixin, no objects of this class should be

82 created. This class is intended to be subclassed together with one of

83 Sequence classes.

85 Attributes:

87 * pdb_chain -- Bio.PDB.Chain

88 * pdb_file -- file object

90 * pdb_residues -- {Monomer: Bio.PDB.Residue}

91 * pdb_secstr -- {Monomer: 'Secondary structure'}

92 Code Secondary structure

93 H alpha-helix

94 B Isolated beta-bridge residue

95 E Strand

96 G 3-10 helix

97 I pi-helix

98 T Turn

99 S Bend

100 - Other

101

102

103 ?TODO: global pdb_structures

104 """

105

106 def __init__(self, *args, **kw):

107 self.pdb_chains = []

108 self.pdb_files = {}

109 self.pdb_residues = {}

110 self.pdb_secstr = {}

111

112 def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):

113 """ Reads Pdb chain from file

114

115 and align each Monomer with PDB.Residue (TODO)

116 """

117 name = std_id(pdb_id, pdb_chain, pdb_model)

118 structure = get_structure(pdb_file, name)

119 chain = structure[pdb_model][pdb_chain]

120 self.pdb_chains.append(chain)

121 self.pdb_residues[chain] = {}

122 self.pdb_secstr[chain] = {}

123 pdb_sequence = Sequence.from_pdb_chain(chain)

124 a = alignment.Alignment.from_sequences(self, pdb_sequence)

125 a.muscle_align()

126 for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):

127 if pdb_sequence.pdb_has(chain, pdb_monomer):

128 residue = pdb_sequence.pdb_residues[chain][pdb_monomer]

129 self.pdb_residues[chain][monomer] = residue

130 self.pdb_files[chain] = pdb_file

131

132 def pdb_unload(self):

133 """ Delete all pdb-connected links """

134 #~ gc.get_referrers(self.pdb_chains[0])

135 self.pdb_chains = []

136 self.pdb_residues = {}

137 self.pdb_secstr = {} # FIXME

138 self.pdb_files = {} # FIXME

139

140 @staticmethod

141 def from_pdb_chain(chain):

142 """ Returns Sequence with Monomers with link to Bio.PDB.Residue

143

144 chain is Bio.PDB.Chain

145 """

146 cappbuilder = CaPPBuilder()

147 peptides = cappbuilder.build_peptides(chain)

148 sequence = Sequence()

149 sequence.pdb_chains = [chain]

150 sequence.pdb_residues[chain] = {}

151 sequence.pdb_secstr[chain] = {}

152 for peptide in peptides:

153 for ca_atom in peptide.get_ca_list():

154 residue = ca_atom.get_parent()

155 monomer = AminoAcidType.from_pdb_residue(residue).instance()

156 sequence.pdb_residues[chain][monomer] = residue

157 sequence.monomers.append(monomer)

158 return sequence

159

160 def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):

161 """ Adds pdb information to each monomer

162

163 Returns if information has been successfully added

164 TODO: conformity_file

165

166 id-format lava flow

167 """

168 if not conformity_info:

169 path = os.path.join(pdb_directory, self.name)

170 if os.path.exists(path) and os.path.getsize(path):

171 match = pdb_id_parse(self.name)

172 self.pdb_chain_add(open(path), match['code'],

173 match['chain'], match['model'])

174 else:

175 match = pdb_id_parse(self.name)

176 if match:

177 code = match['code']

178 pdb_filename = config.pdb_dir % code

179 if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):

180 url = config.pdb_url % code

181 print "Download %s" % url

182 pdb_file = open(pdb_filename, 'w')

183 data = urllib2.urlopen(url).read()

184 pdb_file.write(data)

185 pdb_file.close()

186 print "Save %s" % pdb_filename

187 pdb_file = open(pdb_filename)

188 self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])

189

190 def pdb_save(self, out_filename, pdb_chain):

191 """ Saves pdb_chain to out_file """

192 class GlySelect(Select):

193 def accept_chain(self, chain):

194 if chain == pdb_chain:

195 return 1

196 else:

197 return 0

198 io = PDBIO()

199 structure = chain.get_parent()

200 io.set_structure(structure)

201 io.save(out_filename, GlySelect())

202

203

204 def pdb_add_sec_str(self, pdb_chain):

205 """ Add secondary structure data """

206 tmp_file = NamedTemporaryFile(delete=False)

207 tmp_file.close()

208 pdb_file = self.pdb_files[pdb_chain].name

209 os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})

210 dssp, keys = make_dssp_dict(tmp_file.name)

211 for monomer in self.monomers:

212 if self.pdb_has(pdb_chain, monomer):

213 residue = self.pdb_residues[pdb_chain][monomer]

214 try:

215 d = dssp[(pdb_chain.get_id(), residue.get_id())]

216 self.pdb_secstr[pdb_chain][monomer] = d[1]

217 except:

218 print "No dssp information about %s at %s" % (monomer, pdb_chain)

219 os.unlink(tmp_file.name)

220

221 def pdb_has(self, chain, monomer):

222 return chain in self.pdb_residues and monomer in self.pdb_residues[chain]

223

224 def secstr_has(self, chain, monomer):

225 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]

226

227

228 class AlignmentMixin(base.Alignment):

229 """Mixin to add 3D properties to alignments.

230

231 Please note: since this is a mixin, no objects of this class should be

232 created. This class is intended to be subclassed together with one of

233 Alignment classes.

234 """

235

236 def secstr(self, sequence, pdb_chain, gap='-'):

237 """ Returns string representing secondary structure """

238 return ''.join([

239 (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)

240 for m in self.body[sequence]])

241

242 class BlockMixin(base.Block):

243 """Mixin to add 3D properties to blocks.

244

245 Please note: since this is a mixin, no objects of this class should be

246 created. This class is intended to be subclassed together with one of

247 Block classes.

248 """

249

250 def geometrical_cores(self, max_delta=config.delta,

251 timeout=config.timeout, minsize=config.minsize,

252 ac_new_atoms=config.ac_new_atoms,

253 ac_count=config.ac_count):

254 """ Returns length-sorted list of blocks, representing GCs

255

256 * max_delta -- threshold of distance spreading

257 * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)

258 * minsize -- min size of each core

259 * ac_new_atoms -- min part or new atoms in new alternative core

260 current GC is compared with each of already selected GCs if

261 difference is less then ac_new_atoms, current GC is skipped

262 difference = part of new atoms in current core

263 * ac_count -- max number of cores (including main core)

264 -1 means infinity

265

266 If more than one pdb chain for some sequence provided, consider all of them

267 cost is calculated as 1 / (delta + 1)

268

269 delta in [0, +inf) => cost in (0, 1]

270 """

271 nodes = self.positions

272 lines = {}

273 for i in self.positions:

274 for j in self.positions:

275 if i < j:

276 distances = []

277 for sequence in self.sequences:

278 for chain in sequence.pdb_chains:

279 m1 = self.alignment.body[sequence][i]

280 m2 = self.alignment.body[sequence][j]

281 if m1 and m2:

282 r1 = sequence.pdb_residues[chain][m1]

283 r2 = sequence.pdb_residues[chain][m2]

284 ca1 = r1['CA']

285 ca2 = r2['CA']

286 d = ca1 - ca2 # Bio.PDB feature

287 distances.append(d)

288 if len(distances) >= 2:

289 delta = max(distances) - min(distances)

290 if delta <= max_delta:

291 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)

292 graph = Graph(nodes, lines)

293 cliques = graph.cliques(timeout=timeout, minsize=minsize)

294 GCs = []

295 for clique in cliques:

296 for GC in GCs:

297 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):

298 break

299

300 def ca_atoms(self, sequence, pdb_chain):

301 """ Iterates Ca-atom of monomers of this sequence from this block """

302 return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())

303

304 def sequences_chains(self):

305 """ Iterates pairs (sequence, chain) """

306 for sequence in self.alignment.sequences:

307 if sequence in self.sequences:

308 for chain in sequence.pdb_chains:

309 yield (sequence, chain)

310

311 def superimpose(self):

312 """ Superimpose all pdb_chains in this block """

313 sequences_chains = list(self.sequences_chains())

314 if len(sequences_chains) >= 1:

315 sup = Superimposer()

316 fixed_sequence, fixed_chain = sequences_chains.pop()

317 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)

318 for sequence, chain in sequences_chains:

319 moving_atoms = self.ca_atoms(sequence, chain)

320 sup.set_atoms(fixed_atoms, moving_atoms)

321 # Apply rotation/translation to the moving atoms

322 sup.apply(moving_atoms)

323

324 def pdb_save(self, out_file):

325 """ Save all sequences

326

327 Returns {(sequence, chain): CHAIN}

328 CHAIN is chain letter in new file

329 """

330 tmp_file = NamedTemporaryFile(delete=False)

331 tmp_file.close()

332

333 for sequence, chain in self.sequences_chains():

334 sequence.pdb_save(tmp_file.name, chain)

335 # TODO: read from tmp_file.name

336 # change CHAIN

337 # add to out_file

338

339 os.unlink(NamedTemporaryFile)

340

341 # vim: set ts=4 sts=4 sw=4 et: