allpy: 53800c7e3458 allpy/markups.py

allpy

view allpy/markups.py @ 888:53800c7e3458

add test for secondary structure markup for sequence Module protein_pdb was copied from blocks3d/protein_pdb.py. Sequence is created from string, pdb is loaded, ss-markup is applied. Then secondary structure is checked to be entirely alpha-helix. see #120

author	Boris Nagaev <bnagaev@gmail.com>
date	Sun, 18 Sep 2011 15:11:49 +0400
parents	077e5e18a600
children	6134ae1dfdae

line source

1 import os

2 from tempfile import NamedTemporaryFile

4 from Bio.PDB import DSSP

6 import base

8 by_name = {}

9 """A dictionary of default markup name -> markup class."""

11 def update(*args):

12 """Update `by_name` dictionary.

14 If any arguments are given, add them to markups namespace beforehands.

15 """

16 # Add user classes if necessary

17 for markup_class in args:

18 class_name = markup_class.__name__

19 assert class_name not in globals(), "SameNamed markup already exists!"

20 globals()[class_name] = markup_class

21 # Update `by_name` dictonary

22 global by_name

23 by_name = {}

24 for markup_class in globals().values():

25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):

26 fullname = markup_class.kind, markup_class.name

27 assert fullname not in by_name, "Samenamed markup already exists!"

28 by_name[fullname] = markup_class

30 class MarkupIOMixin(base.Markup):

31 """Standard helper mixin for creating saveable markups."""

33 separator = ','

34 """Separator to use when saving/loading markup."""

36 io_class = None

37 """MUST be overloaded when subclassing. io_class in file."""

39 @staticmethod

40 def parse_item(key, value):

41 """Deserialize one item of markup. Overload when subclassing."""

42 return value

44 @staticmethod

45 def repr_item(key, value):

46 """Serialize one item of markup. Overload when subclassing."""

47 return str(value)

49 @classmethod

50 def from_record(cls, container, record, name=None):

51 """Read markup from semi-parsed record from 'markup' file."""

52 assert record['io_class'] == cls.io_class

53 separator = record.get('separator', cls.separator)

54 values = record['markup'].split(separator)

55 result = container.add_markup(name, markup_class=cls)

56 assert len(values) == len(result.sorted_keys())

57 for key, value in zip(result.sorted_keys(), values):

58 if value:

59 result[key] = cls.parse_item(key, value)

60 return result

62 def to_record(self):

63 """Write markup to semi-serialized record for 'markup' file."""

64 values = []

65 for key in self.sorted_keys():

66 if key in self:

67 values.append(self.repr_item(key, self[key]))

68 else:

69 values.append('')

70 markup = self.separator.join(values)

71 return {

72 'markup': markup,

73 'io_class': self.io_class,

74 'separator': self.separator,

75 }

77 class IntMarkupMixin(MarkupIOMixin):

78 """Markup that has integer values."""

80 io_class = 'IntMarkup'

82 @staticmethod

83 def parse_item(key, value):

84 return int(value)

86 class SequenceNumberMarkup(base.SequenceMarkup):

88 name = 'number'

90 def refresh(self):

91 for number, monomer in enumerate(self.sequence, 1):

92 monomer.number = number

94 class SequenceIndexMarkup(base.SequenceMarkup):

96 name = 'index'

98 def refresh(self):

99 for index, monomer in enumerate(self.sequence):

100 monomer.index = index

101

102 class AlignmentNumberMarkup(base.AlignmentMarkup):

103

104 name = 'number'

105

106 def refresh(self):

107 for number, column in enumerate(self.alignment.columns, 1):

108 self[column] = number

109

110 class AlignmentIndexMarkup(base.AlignmentMarkup):

111

112 name = 'index'

113

114 def refresh(self):

115 for index, column in enumerate(self.alignment.columns):

116 self[column] = index

117

118 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):

119

120 name = 'case'

121 io_class = 'SequenceCaseMarkup'

122

123 def refresh(self):

124 for monomer in self.sequence:

125 if monomer.input_code1.isupper():

126 monomer.case = 'upper'

127 elif monomer.input_code1.islower():

128 monomer.case = 'lower'

129

130 @staticmethod

131 def parse_value(monomer, value):

132 assert mnomer.code1 == value.upper()

133 if value.isupper():

134 return 'upper'

135 if value.islower():

136 return 'lower'

137

138 @staticmethod

139 def repr_value(monomer, value):

140 if monomer.case == 'upper':

141 return monomer.code1.upper()

142 if monomer.case == 'lower':

143 return monomer.code1.lower()

144 raise AssertionError("Unknown monomer case")

145

146 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):

147 name = 'pdb_resi'

148

149 def from_pdb(self):

150 for monomer in self.sequence:

151 try:

152 monomer.pdb_resi = monomer.pdb_residue.id[1]

153 except Exception:

154 pass

155

156 def add_pdb(self, download_pdb=None, xyz_only=False):

157 import structure

158 if download_pdb is None:

159 download_pdb = structure.cached_download_pdb

160

161 match = structure.pdb_id_parse(self.sequence.name)

162 code, model , chain = match['code'], match['model'], match['chain']

163 pdb_file = download_pdb(code)

164 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)

165 pdb_chain = pdb_structure[0][chain]

166 if not xyz_only:

167 self.sequence.pdb_chain = pdb_chain

168 for monomer in self.sequence:

169 if monomer in self:

170 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']

171 monomer.ca_xyz = pdb_residue['CA'].get_vector()

172 if not xyz_only:

173 monomer.pdb_residue = pdb_residue

174

175 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):

176 """ Secondary structure markup for sequence.

177

178 Depends on dsspcmbi program.

179 Sequence should be structure.SequenceMixin, pdb should be loaded.

180 Note that DSSP cannot handle mutiple models!

181 Note that dssp executable name is hardcoded (=dsspcmbi).

182

183 Notation:

184 * H -- alpha-helix

185 * B -- Isolated beta-bridge residue

186 * E -- Strand

187 * G -- 3-10 helix

188 * I -- pi-helix

189 * T -- Turn

190 * S -- Bend

191 * - -- Other

192 """

193

194 name = 'ss'

195 io_class = 'SequenceSecondaryStructureMarkup'

196

197 def refresh(self):

198 chain = self.sequence.pdb_chain

199 model = chain.get_parent()

200 pdb_file = NamedTemporaryFile(delete=False)

201 self.sequence.save_pdb(pdb_file)

202 pdb_file.close()

203 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')

204 for monomer in self.sequence:

205 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]

206 os.unlink(pdb_file.name)

207

208 # This MUST be the last statement in this module.

209 update()

210

211 # vim: set ts=4 sts=4 sw=4 et: