allpy: 675b402094be lib/block.py

allpy

view lib/block.py @ 151:675b402094be

day commit -- a lot of changes fasta.py: universal save_fasta() determine_long_line -- for determine length of fasta sequence string in user input everywhere: standart long_line=60 --> 70 blocK.sequences_chains: returns sequences in order as in project added monomer pdb_secstr to store secondary structure pdb adding: some improvements and fixes fix in from_pdb_chain: use all peptides, not only first Sequence.pdb_files added to store information about pdb file for each chain dssp bindings to get secondary structure /sec_str -- tool to map secondary structure on each sequence of alignment

author	boris (netbook) <bnagaev@gmail.com>
date	Tue, 26 Oct 2010 00:40:36 +0400
parents	f7dead025719
children	0c7f6117481b

line source

1 #!usr/bin/python

3 import sys

5 import project

6 import sequence

7 import monomer

8 import config

9 from graph import Graph

10 from Bio.PDB import Superimposer

11 from tempfile import NamedTemporaryFile

12 import os

13 from fasta import save_fasta

15 class Block(object):

16 """ Block of alignment

18 Mandatory data:

19 * self.project -- project object, which the block belongs to

20 * self.sequences - set of sequence objects that contain monomers

21 and/or gaps, that constitute the block

22 * self.positions -- sorted list of positions of the project.alignment that

23 are included in the block

25 Don't change self.sequences -- it may be a link to other block.sequences

27 How to create a new block:

28 >>> import project

29 >>> import block

30 >>> proj = project.Project(open("test.fasta"))

31 >>> block1 = block.Block(proj)

32 """

34 def __init__(self, project, sequences=None, positions=None):

35 """ Builds new block from project

37 if sequences==None, all sequences are used

38 if positions==None, all positions are used

39 """

40 if sequences == None:

41 sequences = set(project.sequences) # copy

42 if positions == None:

43 positions = range(len(project))

44 self.project = project

45 self.sequences = sequences

46 self.positions = positions

48 def save_fasta(self, out_file, long_line=70, gap='-'):

49 """ Saves alignment to given file in fasta-format

51 No changes in the names, descriptions or order of the sequences

52 are made.

53 """

54 for sequence in self.sequences:

55 alignment_monomers = self.project.alignment[sequence]

56 block_monomers = [alignment_monomers[i] for i in self.positions]

57 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])

58 save_fasta(out_file, string, sequence.name, sequence.description, long_line)

60 def geometrical_cores(self, max_delta=config.delta,

61 timeout=config.timeout, minsize=config.minsize,

62 ac_new_atoms=config.ac_new_atoms,

63 ac_count=config.ac_count):

64 """ Returns length-sorted list of blocks, representing GCs

66 max_delta -- threshold of distance spreading

67 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)

68 minsize -- min size of each core

69 ac_new_atoms -- min part or new atoms in new alternative core

70 current GC is compared with each of already selected GCs

71 if difference is less then ac_new_atoms, current GC is skipped

72 difference = part of new atoms in current core

73 ac_count -- max number of cores (including main core)

74 -1 means infinity

75 If more than one pdb chain for some sequence provided, consider all of them

76 cost is calculated as 1 / (delta + 1)

77 delta in [0, +inf) => cost in (0, 1]

78 """

79 nodes = self.positions

80 lines = {}

81 for i in self.positions:

82 for j in self.positions:

83 if i < j:

84 distances = []

85 for sequence in self.sequences:

86 for chain in sequence.pdb_chains:

87 m1 = self.project.alignment[sequence][i]

88 m2 = self.project.alignment[sequence][j]

89 if m1 and m2:

90 ca1 = m1.pdb_residues[chain]['CA']

91 ca2 = m2.pdb_residues[chain]['CA']

92 d = ca1 - ca2 # Bio.PDB feature

93 distances.append(d)

94 if len(distances) >= 2:

95 delta = max(distances) - min(distances)

96 if delta <= max_delta:

97 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)

98 graph = Graph(nodes, lines)

99 cliques = graph.cliques(timeout=timeout, minsize=minsize)

100 GCs = []

101 for clique in cliques:

102 for GC in GCs:

103 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):

104 break

105 else:

106 GCs.append(Block(self.project, self.sequences, clique))

107 if ac_count != -1 and len(GCs) >= ac_count:

108 break

109 return GCs

110

111 def xstring(self, x='X', gap='-'):

112 """ Returns string consisting of gap chars and chars x at self.positions

113

114 Length of returning string = length of project

115 """

116 monomers = [False] * len(self.project)

117 for i in self.positions:

118 monomers[i] = True

119 return ''.join([x if m else gap for m in monomers])

120

121 def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70):

122 """ Save xstring and name in fasta format """

123 save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line)

124

125 def monomers(self, sequence):

126 """ Iterates monomers of this sequence from this block """

127 alignment_sequence = self.project.alignment[sequence]

128 return (alignment_sequence[i] for i in self.positions)

129

130 def ca_atoms(self, sequence, pdb_chain):

131 """ Iterates Ca-atom of monomers of this sequence from this block """

132 return (monomer.pdb_residues[pdb_chain] for monomer in self.monomers())

133

134 def sequences_chains(self):

135 """ Iterates pairs (sequence, chain) """

136 for sequence in self.project.sequences:

137 if sequence in self.sequences:

138 for chain in sequence.pdb_chains:

139 yield (sequence, chain)

140

141 def superimpose(self):

142 """ Superimpose all pdb_chains in this block """

143 sequences_chains = list(self.sequences_chains())

144 if len(sequences_chains) >= 1:

145 sup = Superimposer()

146 fixed_sequence, fixed_chain = sequences_chains.pop()

147 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)

148 for sequence, chain in sequences_chains:

149 moving_atoms = self.ca_atoms(sequence, chain)

150 sup.set_atoms(fixed_atoms, moving_atoms)

151 # Apply rotation/translation to the moving atoms

152 sup.apply(moving_atoms)

153

154 def pdb_save(self, out_file):

155 """ Save all sequences

156

157 Returns {(sequence, chain): CHAIN}

158 CHAIN is chain letter in new file

159 """

160 tmp_file = NamedTemporaryFile(delete=False)

161 tmp_file.close()

162

163 for sequence, chain in self.sequences_chains():

164 sequence.pdb_save(tmp_file.name, chain)

165 # TODO: read from tmp_file.name

166 # change CHAIN

167 # add to out_file

168

169 os.unlink(NamedTemporaryFile)

170