allpy: f7dead025719 lib/block.py

allpy

view lib/block.py @ 150:f7dead025719

documentation improvements

author	boris (netbook) <bnagaev@gmail.com>
date	Mon, 25 Oct 2010 13:30:11 +0400
parents	85fc264975a2
children	675b402094be

line source

1 #!usr/bin/python

3 import sys

5 import project

6 import sequence

7 import monomer

8 import config

9 from graph import Graph

10 from Bio.PDB import Superimposer

11 from tempfile import NamedTemporaryFile

12 import os

14 class Block(object):

15 """ Block of alignment

17 Mandatory data:

18 * self.project -- project object, which the block belongs to

19 * self.sequences - set of sequence objects that contain monomers

20 and/or gaps, that constitute the block

21 * self.positions -- sorted list of positions of the project.alignment that

22 are included in the block

24 Don't change self.sequences -- it may be a link to other block.sequences

26 How to create a new block:

27 >>> import project

28 >>> import block

29 >>> proj = project.Project(open("test.fasta"))

30 >>> block1 = block.Block(proj)

31 """

33 def __init__(self, project, sequences=None, positions=None):

34 """ Builds new block from project

36 if sequences==None, all sequences are used

37 if positions==None, all positions are used

38 """

39 if sequences == None:

40 sequences = set(project.sequences) # copy

41 if positions == None:

42 positions = range(len(project))

43 self.project = project

44 self.sequences = sequences

45 self.positions = positions

47 def save_fasta(self, out_file, long_line=60, gap='-'):

48 """ Saves alignment to given file in fasta-format

50 Splits long lines to substrings of length=long_line

51 To prevent this, set long_line=None

53 No changes in the names, descriptions or order of the sequences

54 are made.

55 """

56 for sequence in self.sequences:

57 out_file.write(">%(name)s %(description)s \n" % sequence.__dict__)

58 alignment_monomers = self.project.alignment[sequence]

59 block_monomers = [alignment_monomers[i] for i in self.positions]

60 string = ''.join([m.type.code1 if m else '-' for m in block_monomers])

61 if long_line:

62 for i in range(0, len(string) // long_line + 1):

63 out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])

64 else:

65 out_file.write("%s \n" % string)

67 def geometrical_cores(self, max_delta=config.delta,

68 timeout=config.timeout, minsize=config.minsize,

69 ac_new_atoms=config.ac_new_atoms,

70 ac_count=config.ac_count):

71 """ Returns length-sorted list of blocks, representing GCs

73 max_delta -- threshold of distance spreading

74 timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)

75 minsize -- min size of each core

76 ac_new_atoms -- min part or new atoms in new alternative core

77 current GC is compared with each of already selected GCs

78 if difference is less then ac_new_atoms, current GC is skipped

79 difference = part of new atoms in current core

80 ac_count -- max number of cores (including main core)

81 -1 means infinity

82 If more than one pdb chain for some sequence provided, consider all of them

83 cost is calculated as 1 / (delta + 1)

84 delta in [0, +inf) => cost in (0, 1]

85 """

86 nodes = self.positions

87 lines = {}

88 for i in self.positions:

89 for j in self.positions:

90 if i < j:

91 distances = []

92 for sequence in self.sequences:

93 for chain in sequence.pdb_chains:

94 m1 = self.project.alignment[sequence][i]

95 m2 = self.project.alignment[sequence][j]

96 if m1 and m2:

97 ca1 = m1.pdb_residues[chain]['CA']

98 ca2 = m2.pdb_residues[chain]['CA']

99 d = ca1 - ca2 # Bio.PDB feature

100 distances.append(d)

101 if len(distances) >= 2:

102 delta = max(distances) - min(distances)

103 if delta <= max_delta:

104 lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)

105 graph = Graph(nodes, lines)

106 cliques = graph.cliques(timeout=timeout, minsize=minsize)

107 GCs = []

108 for clique in cliques:

109 for GC in GCs:

110 if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):

111 break

112 else:

113 GCs.append(Block(self.project, self.sequences, clique))

114 if ac_count != -1 and len(GCs) >= ac_count:

115 break

116 return GCs

117

118 def xstring(self, x='X', gap='-'):

119 """ Returns string consisting of gap chars and chars x at self.positions

120

121 Length of returning string = length of project

122 """

123 monomers = [False] * len(self.project)

124 for i in self.positions:

125 monomers[i] = True

126 return ''.join([x if m else gap for m in monomers])

127

128 def save_xstring(self, out_file, name, description='', x='X', gap='-'):

129 """ Save xstring and name in fasta format """

130 out_file.write(">%(name)s %(description)s \n" % \

131 {'name':name, 'description':description})

132

133 out_file.write("%(xstring)s \n" % {'xstring':self.xstring(x=x, gap=gap)})

134

135 def monomers(self, sequence):

136 """ Iterates monomers of this sequence from this block """

137 alignment_sequence = self.project.alignment[sequence]

138 return (alignment_sequence[i] for i in self.positions)

139

140 def ca_atoms(self, sequence, pdb_chain):

141 """ Iterates Ca-atom of monomers of this sequence from this block """

142 return (monomer.pdb_residues[pdb_chain] for monomer in self.monomers())

143

144 def sequences_chains(self):

145 """ Iterates pairs (sequence, chain) """

146 for sequence in self.sequences:

147 for chain in sequence.pdb_chains:

148 yield (sequence, chain)

149

150 def superimpose(self):

151 """ Superimpose all pdb_chains in this block """

152 sequences_chains = list(self.sequences_chains())

153 if len(sequences_chains) >= 1:

154 sup = Superimposer()

155 fixed_sequence, fixed_chain = sequences_chains.pop()

156 fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)

157 for sequence, chain in sequences_chains:

158 moving_atoms = self.ca_atoms(sequence, chain)

159 sup.set_atoms(fixed_atoms, moving_atoms)

160 # Apply rotation/translation to the moving atoms

161 sup.apply(moving_atoms)

162

163 def pdb_save(self, out_file):

164 """ Save all sequences

165

166 Returns {(sequence, chain): CHAIN}

167 CHAIN is chain letter in new file

168 """

169 tmp_file = NamedTemporaryFile(delete=False)

170 tmp_file.close()

171

172 for sequence, chain in self.sequences_chains():

173 sequence.pdb_save(tmp_file.name, chain)

174 # TODO: read from tmp_file.name

175 # change CHAIN

176 # add to out_file

177

178 os.unlink(NamedTemporaryFile)

179

180