Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/annotate/9ea5099309b8/allpy/base.py
Дата изменения: Unknown
Дата индексирования: Sun Mar 2 07:05:14 2014
Кодировка:
allpy: allpy/base.py annotate

allpy

annotate allpy/base.py @ 274:9ea5099309b8

Moved PDB parts of allpy.base.Sequence to allpy.pdb.Sequence
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Wed, 15 Dec 2010 19:19:11 +0300
parents bda87d4c525e
children 4a2341bc90b1
rev   line source
me@261 1 import sys
me@261 2 import os
me@262 3 import os.path
me@261 4 from tempfile import NamedTemporaryFile
me@262 5 import urllib2
me@261 6
me@261 7 import config
me@261 8 from graph import Graph
me@262 9 from Bio.PDB.DSSP import make_dssp_dict
bnagaev@249 10 from fasta import save_fasta
me@260 11 import data.codes
me@260 12
me@260 13 class MonomerType(object):
me@260 14 """Class of monomer types.
me@260 15
me@260 16 Each MonomerType object represents a known monomer type, e.g. Valine,
me@260 17 and is referenced to by each instance of monomer in a given sequence.
me@260 18
me@260 19 - `name`: full name of monomer type
me@260 20 - `code1`: one-letter code
me@260 21 - `code3`: three-letter code
me@260 22 - `is_modified`: either of True or False
me@260 23
me@260 24 class atributes:
me@260 25
me@260 26 - `by_code1`: a mapping from one-letter code to MonomerType object
me@260 27 - `by_code3`: a mapping from three-letter code to MonomerType object
me@260 28 - `by_name`: a mapping from monomer name to MonomerType object
me@260 29 - `instance_type`: class of Monomer objects to use when creating new
me@260 30 objects; this must be redefined in descendent classes
me@260 31
me@260 32 All of the class attributes MUST be redefined when subclassing.
me@260 33 """
me@260 34
me@260 35 by_code1 = {}
me@260 36 by_code3 = {}
me@260 37 by_name = {}
me@260 38 instance_type = None
me@260 39
me@260 40 def __init__(self, name="", code1="", code3="", is_modified=False):
me@260 41 self.name = name.capitalize()
me@260 42 self.code1 = code1.upper()
me@260 43 self.code3 = code3.upper()
me@260 44 self.is_modified = bool(is_modified)
me@260 45 if not is_modified:
me@260 46 self.by_code1[self.code1] = self
me@260 47 self.by_code3[code3] = self
me@260 48 self.by_name[name] = self
me@260 49 # We duplicate distinguished long names into MonomerType itself,
me@260 50 # so that we can use MonomerType.from_code3 to create the relevant
me@260 51 # type of monomer.
me@260 52 MonomerType.by_code3[code3] = self
me@260 53 MonomerType.by_name[name] = self
me@260 54
me@260 55 @classmethod
me@260 56 def _initialize(cls, type_letter, codes=data.codes.codes):
me@260 57 """Create all relevant instances of MonomerType.
me@260 58
me@260 59 `type_letter` is either of:
me@260 60
me@260 61 - 'p' for protein
me@260 62 - 'd' for DNA
me@260 63 - 'r' for RNA
me@260 64
me@260 65 `codes` is a table of monomer codes
me@260 66 """
me@260 67 for type, code1, is_modified, code3, name in codes:
me@260 68 if type == type_letter:
me@260 69 cls(name, code1, code3, is_modified)
me@260 70
me@260 71 @classmethod
me@260 72 def from_code1(cls, code1):
me@260 73 """Return monomer type by one-letter code."""
me@260 74 return cls.by_code1[code1.upper()]
me@260 75
me@260 76 @classmethod
me@260 77 def from_code3(cls, code3):
me@260 78 """Return monomer type by three-letter code."""
me@260 79 return cls.by_code3[code3.upper()]
me@260 80
me@260 81 @classmethod
me@260 82 def from_name(cls, name):
me@260 83 """Return monomer type by name."""
me@260 84 return cls.by_name[name.capitalize()]
me@260 85
me@260 86 def instance(self):
me@260 87 """Create a new monomer of given type."""
me@260 88 return self.instance_type(self)
me@260 89
me@260 90 def __eq__(self, other):
me@260 91 if hasattr(other, "type"):
me@260 92 return self is other.type
me@260 93 return self is other
me@260 94
me@260 95 class Monomer(object):
me@260 96 """Monomer object.
me@260 97
me@260 98 attributes:
me@260 99
me@260 100 - `type`: type of monomer (a MonomerType object)
me@260 101
me@260 102 class attribute `monomer_type` is MonomerType or either of it's subclasses,
me@260 103 it is used when creating new monomers. It MUST be redefined when subclassing Monomer.
me@260 104 """
me@260 105 monomer_type = MonomerType
me@260 106
me@260 107 def __init__(self, type):
me@260 108 self.type = type
me@260 109
me@260 110 @classmethod
me@260 111 def from_code1(cls, code1):
me@260 112 return cls(cls.monomer_type.by_code1[code1.upper()])
me@260 113
me@260 114 @classmethod
me@260 115 def from_code3(cls, code3):
me@260 116 return cls(cls.monomer_type.by_code3[code3.upper()])
me@260 117
me@260 118 @classmethod
me@260 119 def from_name(cls, name):
me@260 120 return cls(cls.monomer_type.by_name[name.capitalize()])
me@260 121
me@260 122 def __eq__(self, other):
me@260 123 if hasattr(other, "type"):
me@260 124 return self.type is other.type
me@260 125 return self.type is other
bnagaev@239 126
bnagaev@239 127 class Sequence(list):
me@274 128 """Sequence of Monomers.
bnagaev@243 129
me@274 130 This behaves like list of monomer objects. In addition to standard list
me@274 131 behaviour, Sequence has the following attributes:
me@270 132
me@274 133 * name -- str with the name of the sequence
me@274 134 * description -- str with description of the sequence
me@274 135 * source -- str denoting source of the sequence
me@266 136
me@274 137 Any of them may be empty (i.e. hold empty string)
me@274 138 """
me@270 139
me@262 140 def __init__(self, monomers=None, name='', description=""):
me@262 141 if not monomers:
me@262 142 monomers = []
me@262 143 self.name = name
me@262 144 self.description = description
me@270 145 self.monomers = monomers
me@270 146
me@262 147 def __len__(self):
me@262 148 return len(self.monomers)
me@270 149
me@262 150 def __str__(self):
me@262 151 """ Returns sequence in one-letter code """
me@262 152 return ''.join([monomer.type.code1 for monomer in self.monomers])
me@270 153
me@262 154 def __eq__(self, other):
me@262 155 """ Returns if all corresponding monomers of this sequences are equal
me@270 156
me@262 157 If lengths of sequences are not equal, returns False
me@270 158 """
me@262 159 return len(self) == len(other) and \
me@262 160 all([a==b for a, b in zip(self.monomers, other.monomers)])
me@270 161
me@262 162 def __ne__(self, other):
me@262 163 return not (self == other)
me@270 164
me@273 165 @classmethod
me@273 166 def from_string(cls, string, name='', description=''):
me@273 167 """Create sequences from string of one-letter codes."""
me@273 168 monomer = cls.monomer_type.from_code1
me@273 169 monomers = [monomer(letter) for letter in string]
me@273 170 return cls(monomers, name, description)
me@262 171
me@262 172 def secstr_has(self, chain, monomer):
me@262 173 return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
me@262 174
me@262 175 @staticmethod
me@262 176 def file_slice(file, n_from, n_to, fasta_name='', name='', description='', monomer_kind=AminoAcidType):
me@270 177 """ Build and return sequence, consisting of part of sequence from file
me@262 178
me@262 179 Does not control gaps
me@262 180 """
me@262 181 inside = False
me@262 182 number_used = 0
me@262 183 s = ''
me@262 184 for line in file:
me@262 185 line = line.split()
me@262 186 if not inside:
me@262 187 if line.startswith('>%s' % fasta_name):
me@262 188 inside = True
me@262 189 else:
me@262 190 n = len(line)
me@262 191 s += line[(n_from - number_user):(n_to - number_user)]
me@262 192 return Sequence.from_str(s, name, description, monomer_kind)
bnagaev@243 193
bnagaev@249 194 class Alignment(dict):
bnagaev@249 195 """ Alignment
me@270 196
bnagaev@249 197 {<Sequence object>:[<Monomer object>,None,<Monomer object>]}
me@266 198
bnagaev@249 199 keys are the Sequence objects, values are the lists, which
bnagaev@249 200 contain monomers of those sequences or None for gaps in the
bnagaev@249 201 corresponding sequence of alignment
bnagaev@249 202 """
bnagaev@249 203 # _sequences -- list of Sequence objects. Sequences don't contain gaps
bnagaev@249 204 # - see sequence.py module
bnagaev@249 205
bnagaev@249 206 def __init__(self, *args):
bnagaev@249 207 """overloaded constructor
bnagaev@249 208
bnagaev@249 209 Alignment() -> new empty Alignment
bnagaev@249 210 Alignment(sequences, body) -> new Alignment with sequences and
bnagaev@249 211 body initialized from arguments
bnagaev@249 212 Alignment(fasta_file) -> new Alignment, read body and sequences
me@270 213 from fasta file
bnagaev@249 214
bnagaev@249 215 """
bnagaev@249 216 if len(args)>1:#overloaded constructor
bnagaev@249 217 self.sequences=args[0]
bnagaev@249 218 self.body=args[1]
bnagaev@249 219 elif len(args)==0:
bnagaev@249 220 self.sequences=[]
bnagaev@249 221 self.body={}
bnagaev@249 222 else:
bnagaev@249 223 self.sequences, self.body = Alignment.from_fasta(args[0])
bnagaev@249 224
bnagaev@249 225 def length(self):
bnagaev@249 226 """ Returns width, ie length of each sequence with gaps """
bnagaev@249 227 return max([len(line) for line in self.body.values()])
bnagaev@249 228
bnagaev@249 229 def height(self):
bnagaev@249 230 """ The number of sequences in alignment (it's thickness). """
bnagaev@249 231 return len(self.body)
bnagaev@249 232
bnagaev@249 233 def identity(self):
bnagaev@249 234 """ Calculate the identity of alignment positions for colouring.
bnagaev@249 235
bnagaev@249 236 For every (row, column) in alignment the percentage of the exactly
bnagaev@249 237 same residue in the same column in the alignment is calculated.
me@270 238 The data structure is just like the Alignment.body, but istead of
bnagaev@249 239 monomers it contains float percentages.
bnagaev@249 240 """
bnagaev@249 241 # Oh, God, that's awful! Absolutely not understandable.
bnagaev@249 242 # First, calculate percentages of amino acids in every column
bnagaev@249 243 contribution = 1.0 / len(self.sequences)
bnagaev@249 244 all_columns = []
bnagaev@249 245 for position in range(len(self)):
bnagaev@249 246 column_percentage = {}
bnagaev@249 247 for seq in self.body:
bnagaev@249 248 if self.body[seq][position] is not None:
bnagaev@249 249 aa = self.body[seq][position].code
bnagaev@249 250 else:
bnagaev@249 251 aa = None
bnagaev@249 252 if aa in allpy.data.amino_acids:
bnagaev@249 253 if aa in column_percentage.keys():
bnagaev@249 254 column_percentage[aa] += contribution
bnagaev@249 255 else:
bnagaev@249 256 column_percentage[aa] = contribution
bnagaev@249 257 all_columns.append(column_percentage)
bnagaev@249 258 # Second, map these percentages onto the alignment
bnagaev@249 259 self.identity_percentages = {}
bnagaev@249 260 for seq in self.sequences:
bnagaev@249 261 self.identity_percentages[seq] = []
bnagaev@249 262 for seq in self.identity_percentages:
bnagaev@249 263 line = self.identity_percentages[seq]
bnagaev@249 264 for position in range(len(self)):
bnagaev@249 265 if self.body[seq][position] is not None:
bnagaev@249 266 aa = self.body[seq][position].code
bnagaev@249 267 else:
bnagaev@249 268 aa = None
bnagaev@249 269 line.append(all_columns[position].get(aa))
bnagaev@249 270 return self.identity_percentages
bnagaev@249 271
bnagaev@249 272 @staticmethod
bnagaev@249 273 def from_fasta(file, monomer_kind=AminoAcidType):
bnagaev@249 274 """ Import data from fasta file
me@270 275
bnagaev@249 276 monomer_kind is class, inherited from MonomerType
me@270 277
bnagaev@249 278 >>> import alignment
me@270 279 >>> sequences,body=alignment.Alignment.from_fasta(open("test.fasta"))
bnagaev@249 280 """
bnagaev@249 281 import re
bnagaev@249 282
bnagaev@249 283 sequences = []
bnagaev@249 284 body = {}
bnagaev@249 285
bnagaev@249 286 raw_sequences = file.read().split(">")
bnagaev@249 287 if len(raw_sequences) <= 1:
bnagaev@249 288 raise Exception("Wrong format of fasta-file %s" % file.name)
me@270 289
bnagaev@249 290 raw_sequences = raw_sequences[1:] #ignore everything before the first >
bnagaev@249 291 for raw in raw_sequences:
bnagaev@249 292 parsed_raw_sequence = raw.split("\n")
bnagaev@249 293 parsed_raw_sequence = [s.strip() for s in parsed_raw_sequence]
bnagaev@249 294 name_and_description = parsed_raw_sequence[0]
bnagaev@249 295 name_and_description = name_and_description.split(" ",1)
bnagaev@249 296 if len(name_and_description) == 2:
bnagaev@249 297 name, description = name_and_description
me@270 298 elif len(name_and_description) == 1:
bnagaev@249 299 #if there is description
bnagaev@249 300 name = name_and_description[0]
bnagaev@249 301 description = ''
bnagaev@249 302 else:
bnagaev@249 303 raise Exception("Wrong name of sequence %(name)$ fasta-file %(file)s" % \
bnagaev@249 304 {'name': name, 'file': file.name})
me@270 305
bnagaev@249 306 if len(parsed_raw_sequence) <= 1:
bnagaev@249 307 raise Exception("Wrong format of sequence %(name)$ fasta-file %(file)s" % \
bnagaev@249 308 {'name': name, 'file': file.name})
bnagaev@249 309 string = ""
bnagaev@249 310 for piece in parsed_raw_sequence[1:]:
bnagaev@249 311 piece_without_whitespace_chars = re.sub("\s", "", piece)
bnagaev@249 312 string += piece_without_whitespace_chars
bnagaev@249 313 monomers = [] #convert into Monomer objects
bnagaev@249 314 body_list = [] #create the respective list in body dict
bnagaev@249 315 for current_monomer in string:
bnagaev@249 316 if current_monomer not in ["-", ".", "~"]:
bnagaev@249 317 monomers.append(monomer_kind.from_code1(current_monomer).instance())
bnagaev@249 318 body_list.append(monomers[-1])
bnagaev@249 319 else:
bnagaev@249 320 body_list.append(None)
bnagaev@249 321 s = sequence.Sequence(monomers, name, description)
bnagaev@249 322 sequences.append(s)
bnagaev@249 323 body[s] = body_list
bnagaev@249 324 return sequences, body
me@270 325
bnagaev@249 326 @staticmethod
bnagaev@249 327 def from_sequences(*sequences):
bnagaev@249 328 """ Constructs new alignment from sequences
me@270 329
me@270 330 Add None's to right end to make equal lengthes of alignment sequences
bnagaev@249 331 """
bnagaev@249 332 alignment = Alignment()
bnagaev@249 333 alignment.sequences = sequences
bnagaev@249 334 max_length = max(len(sequence) for sequence in sequences)
bnagaev@249 335 for sequence in sequences:
bnagaev@249 336 gaps_count = max_length - len(sequence)
bnagaev@249 337 alignment.body[sequence] = sequence.monomers + [None] * gaps_count
bnagaev@249 338 return alignment
me@270 339
bnagaev@249 340 def save_fasta(self, out_file, long_line=70, gap='-'):
bnagaev@249 341 """ Saves alignment to given file
me@270 342
bnagaev@249 343 Splits long lines to substrings of length=long_line
me@270 344 To prevent this, set long_line=None
bnagaev@249 345 """
bnagaev@249 346 block.Block(self).save_fasta(out_file, long_line=long_line, gap=gap)
me@270 347
bnagaev@249 348 def muscle_align(self):
bnagaev@249 349 """ Simple align ths alignment using sequences (muscle)
me@270 350
bnagaev@249 351 uses old Monomers and Sequences objects
bnagaev@249 352 """
bnagaev@249 353 tmp_file = NamedTemporaryFile(delete=False)
bnagaev@249 354 self.save_fasta(tmp_file)
bnagaev@249 355 tmp_file.close()
bnagaev@249 356 os.system("muscle -in %(tmp)s -out %(tmp)s" % {'tmp': tmp_file.name})
bnagaev@249 357 sequences, body = Alignment.from_fasta(open(tmp_file.name))
bnagaev@249 358 for sequence in self.sequences:
bnagaev@249 359 try:
bnagaev@249 360 new_sequence = [i for i in sequences if sequence==i][0]
bnagaev@249 361 except:
bnagaev@249 362 raise Exception("Align: Cann't find sequence %s in muscle output" % \
bnagaev@249 363 sequence.name)
bnagaev@249 364 old_monomers = iter(sequence.monomers)
bnagaev@249 365 self.body[sequence] = []
bnagaev@249 366 for monomer in body[new_sequence]:
bnagaev@249 367 if not monomer:
bnagaev@249 368 self.body[sequence].append(monomer)
bnagaev@249 369 else:
bnagaev@249 370 old_monomer = old_monomers.next()
bnagaev@249 371 if monomer != old_monomer:
bnagaev@249 372 raise Exception("Align: alignment errors")
bnagaev@249 373 self.body[sequence].append(old_monomer)
bnagaev@249 374 os.unlink(tmp_file.name)
me@270 375
bnagaev@249 376 def column(self, sequence=None, sequences=None, original=None):
bnagaev@249 377 """ returns list of columns of alignment
me@270 378
bnagaev@249 379 sequence or sequences:
bnagaev@249 380 if sequence is given, then column is (original_monomer, monomer)
me@270 381 if sequences is given, then column is (original_monomer, {sequence: monomer})
bnagaev@249 382 if both of them are given, it is an error
bnagaev@249 383 original (Sequence type):
bnagaev@249 384 if given, this filters only columns represented by original sequence
bnagaev@249 385 """
bnagaev@249 386 if sequence and sequences:
bnagaev@249 387 raise Exception("Wrong usage. read help")
bnagaev@249 388 indexes = dict([(v, k) for( k, v) in enumerate(self.sequences)])
bnagaev@249 389 alignment = self.body.items()
bnagaev@249 390 alignment.sort(key=lambda i: indexes[i[0]])
bnagaev@249 391 alignment = [monomers for seq, monomers in alignment]
bnagaev@249 392 for column in zip(*alignment):
bnagaev@249 393 if not original or column[indexes[original]]:
bnagaev@249 394 if sequence:
bnagaev@249 395 yield (column[indexes[original]], column[indexes[sequence]])
bnagaev@249 396 else:
me@270 397 yield (column[indexes[original]],
bnagaev@249 398 dict([(s, column[indexes[s]]) for s in sequences]))
me@270 399
bnagaev@249 400 def secstr(self, sequence, pdb_chain, gap='-'):
bnagaev@249 401 """ Returns string representing secondary structure """
bnagaev@249 402 return ''.join([
me@270 403 (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)
bnagaev@249 404 for m in self.body[sequence]])
bnagaev@249 405
bnagaev@249 406 class Block(object):
me@261 407 """ Block of alignment
me@270 408
me@261 409 Mandatory data:
me@266 410
me@261 411 * self.alignment -- alignment object, which the block belongs to
me@261 412 * self.sequences - set of sequence objects that contain monomers
me@261 413 and/or gaps, that constitute the block
me@261 414 * self.positions -- list of positions of the alignment.body that
me@261 415 are included in the block; position[i+1] is always to the right from position[i]
me@270 416
me@261 417 Don't change self.sequences -- it may be a link to other block.sequences
me@270 418
me@261 419 How to create a new block:
me@261 420 >>> import alignment
me@261 421 >>> import block
me@261 422 >>> proj = alignment.Alignment(open("test.fasta"))
me@261 423 >>> block1 = block.Block(proj)
me@261 424 """
me@270 425
me@261 426 def __init__(self, alignment, sequences=None, positions=None):
me@261 427 """ Builds new block from alignment
me@270 428
me@261 429 if sequences==None, all sequences are used
me@261 430 if positions==None, all positions are used
me@261 431 """
me@261 432 if sequences == None:
me@261 433 sequences = set(alignment.sequences) # copy
me@261 434 if positions == None:
me@261 435 positions = range(len(alignment))
me@261 436 self.alignment = alignment
me@261 437 self.sequences = sequences
me@261 438 self.positions = positions
me@270 439
me@261 440 def save_fasta(self, out_file, long_line=70, gap='-'):
me@270 441 """ Saves alignment to given file in fasta-format
me@270 442
me@261 443 No changes in the names, descriptions or order of the sequences
me@261 444 are made.
me@261 445 """
me@261 446 for sequence in self.sequences:
me@261 447 alignment_monomers = self.alignment.body[sequence]
me@261 448 block_monomers = [alignment_monomers[i] for i in self.positions]
me@261 449 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
me@261 450 save_fasta(out_file, string, sequence.name, sequence.description, long_line)
me@270 451
me@270 452 def geometrical_cores(self, max_delta=config.delta,
me@270 453 timeout=config.timeout, minsize=config.minsize,
me@261 454 ac_new_atoms=config.ac_new_atoms,
me@261 455 ac_count=config.ac_count):
me@261 456 """ Returns length-sorted list of blocks, representing GCs
me@270 457
me@261 458 max_delta -- threshold of distance spreading
me@261 459 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
me@261 460 minsize -- min size of each core
me@261 461 ac_new_atoms -- min part or new atoms in new alternative core
me@261 462 current GC is compared with each of already selected GCs
me@261 463 if difference is less then ac_new_atoms, current GC is skipped
me@261 464 difference = part of new atoms in current core
me@261 465 ac_count -- max number of cores (including main core)
me@261 466 -1 means infinity
me@261 467 If more than one pdb chain for some sequence provided, consider all of them
me@270 468 cost is calculated as 1 / (delta + 1)
me@261 469 delta in [0, +inf) => cost in (0, 1]
me@261 470 """
me@261 471 nodes = self.positions
me@261 472 lines = {}
me@261 473 for i in self.positions:
me@261 474 for j in self.positions:
me@261 475 if i < j:
me@261 476 distances = []
me@261 477 for sequence in self.sequences:
me@261 478 for chain in sequence.pdb_chains:
me@261 479 m1 = self.alignment.body[sequence][i]
me@261 480 m2 = self.alignment.body[sequence][j]
me@261 481 if m1 and m2:
me@261 482 r1 = sequence.pdb_residues[chain][m1]
me@261 483 r2 = sequence.pdb_residues[chain][m2]
me@261 484 ca1 = r1['CA']
me@261 485 ca2 = r2['CA']
me@261 486 d = ca1 - ca2 # Bio.PDB feature
me@261 487 distances.append(d)
me@261 488 if len(distances) >= 2:
me@261 489 delta = max(distances) - min(distances)
me@261 490 if delta <= max_delta:
me@261 491 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
me@261 492 graph = Graph(nodes, lines)
me@261 493 cliques = graph.cliques(timeout=timeout, minsize=minsize)
me@261 494 GCs = []
me@261 495 for clique in cliques:
me@261 496 for GC in GCs:
me@261 497 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
me@261 498 break
me@261 499 else:
me@261 500 GCs.append(Block(self.alignment, self.sequences, clique))
me@261 501 if ac_count != -1 and len(GCs) >= ac_count:
me@261 502 break
me@261 503 return GCs
me@270 504
me@261 505 def xstring(self, x='X', gap='-'):
me@261 506 """ Returns string consisting of gap chars and chars x at self.positions
me@270 507
me@261 508 Length of returning string = length of alignment
me@261 509 """
me@261 510 monomers = [False] * len(self.alignment)
me@261 511 for i in self.positions:
me@261 512 monomers[i] = True
me@261 513 return ''.join([x if m else gap for m in monomers])
me@270 514
me@261 515 def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70):
me@261 516 """ Save xstring and name in fasta format """
me@261 517 save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line)
me@270 518
me@261 519 def monomers(self, sequence):
me@261 520 """ Iterates monomers of this sequence from this block """
me@261 521 alignment_sequence = self.alignment.body[sequence]
me@261 522 return (alignment_sequence[i] for i in self.positions)
me@270 523
me@261 524 def ca_atoms(self, sequence, pdb_chain):
me@261 525 """ Iterates Ca-atom of monomers of this sequence from this block """
me@261 526 return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
me@270 527
me@261 528 def sequences_chains(self):
me@261 529 """ Iterates pairs (sequence, chain) """
me@261 530 for sequence in self.alignment.sequences:
me@261 531 if sequence in self.sequences:
me@261 532 for chain in sequence.pdb_chains:
me@261 533 yield (sequence, chain)
me@270 534
me@261 535 def superimpose(self):
me@261 536 """ Superimpose all pdb_chains in this block """
me@261 537 sequences_chains = list(self.sequences_chains())
me@261 538 if len(sequences_chains) >= 1:
me@261 539 sup = Superimposer()
me@261 540 fixed_sequence, fixed_chain = sequences_chains.pop()
me@261 541 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
me@261 542 for sequence, chain in sequences_chains:
me@261 543 moving_atoms = self.ca_atoms(sequence, chain)
me@261 544 sup.set_atoms(fixed_atoms, moving_atoms)
me@261 545 # Apply rotation/translation to the moving atoms
me@261 546 sup.apply(moving_atoms)
me@270 547
me@261 548 def pdb_save(self, out_file):
me@270 549 """ Save all sequences
me@270 550
me@261 551 Returns {(sequence, chain): CHAIN}
me@261 552 CHAIN is chain letter in new file
me@261 553 """
me@261 554 tmp_file = NamedTemporaryFile(delete=False)
me@261 555 tmp_file.close()
me@270 556
me@261 557 for sequence, chain in self.sequences_chains():
me@261 558 sequence.pdb_save(tmp_file.name, chain)
me@261 559 # TODO: read from tmp_file.name
me@261 560 # change CHAIN
me@261 561 # add to out_file
me@270 562
me@261 563 os.unlink(NamedTemporaryFile)
bnagaev@239 564
me@260 565 # vim: set ts=4 sts=4 sw=4 et: