allpy: 106880c04197 allpy/markups.py

allpy

view allpy/markups.py @ 970:106880c04197

More sensible error message for _append_columns on an empty Block

author	Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date	Sat, 25 Feb 2012 18:44:05 +0400
parents	b8ddd35fac65
children	5fb50c0452dc 08d892230e8c

line source

1 import os

2 from tempfile import NamedTemporaryFile

4 from Bio.PDB import DSSP

6 import base

8 by_name = {}

9 """A dictionary of default markup name -> markup class."""

11 def update(*args):

12 """Update `by_name` dictionary.

14 If any arguments are given, add them to markups namespace beforehands.

15 """

16 # Add user classes if necessary

17 for markup_class in args:

18 class_name = markup_class.__name__

19 assert class_name not in globals(), "SameNamed markup already exists!"

20 globals()[class_name] = markup_class

21 # Update `by_name` dictonary

22 global by_name

23 by_name = {}

24 for markup_class in globals().values():

25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):

26 fullname = markup_class.kind, markup_class.name

27 assert fullname not in by_name, "Samenamed markup already exists!"

28 by_name[fullname] = markup_class

30 class MarkupIOMixin(base.Markup):

31 """Standard helper mixin for creating saveable markups."""

33 separator = ','

34 """Separator to use when saving/loading markup."""

36 quotes = ''

37 """Quotation sign used on the markup."""

39 io_class = None

40 """MUST be overloaded when subclassing. io_class in file."""

42 @staticmethod

43 def parse_item(key, value):

44 """Deserialize one item of markup. Overload when subclassing."""

45 return value

47 @staticmethod

48 def repr_item(key, value):

49 """Serialize one item of markup. Overload when subclassing."""

50 return str(value)

52 @classmethod

53 def from_record(cls, container, record, name=None):

54 """Read markup from semi-parsed record from 'markup' file."""

55 assert record['io_class'] == cls.io_class

56 separator = record.get('separator', cls.separator)

57 quotes = record.get('quotes', cls.quotes)

58 values = record['markup'].strip(quotes)

59 if separator:

60 values = values.split(separator)

61 result = container.add_markup(name, markup_class=cls)

62 assert len(values) == len(result.sorted_keys())

63 for key, value in zip(result.sorted_keys(), values):

64 if value:

65 result[key] = cls.parse_item(key, value)

66 return result

68 def to_record(self):

69 """Write markup to semi-serialized record for 'markup' file."""

70 values = []

71 for key in self.sorted_keys():

72 if key in self:

73 values.append(self.repr_item(key, self[key]))

74 else:

75 values.append('')

76 markup = self.separator.join(values)

77 return {

78 'markup': markup,

79 'io_class': self.io_class,

80 'separator': self.separator,

81 'quotes': self.quotes,

82 }

84 class IntMarkupMixin(MarkupIOMixin):

85 """Markup that has integer values."""

87 io_class = 'IntMarkup'

89 @staticmethod

90 def parse_item(key, value):

91 return int(value)

93 class CharMarkupMixin(MarkupIOMixin):

94 """Markup that has one-letter values."""

96 io_class = 'CharMarkup'

98 @staticmethod

99 def parse_item(key, value):

100 assert len(value) == 1

101 return value

102

103 @staticmethod

104 def repr_item(key, value):

105 assert len(str(value)) == 1

106 return str(value)

107

108 class SequenceCharMarkup(base.SequenceMarkup, CharMarkupMixin):

109 """Generic class for one-letter markups."""

110

111 name = 'char'

112

113 class SequenceNumberMarkup(base.SequenceMarkup):

114

115 name = 'number'

116

117 def refresh(self):

118 for number, monomer in enumerate(self.sequence, 1):

119 monomer.number = number

120

121 class SequenceIndexMarkup(base.SequenceMarkup):

122

123 name = 'index'

124

125 def refresh(self):

126 for index, monomer in enumerate(self.sequence):

127 monomer.index = index

128

129 class AlignmentNumberMarkup(base.AlignmentMarkup):

130

131 name = 'number'

132

133 def refresh(self):

134 for number, column in enumerate(self.alignment.columns, 1):

135 self[column] = number

136

137 class AlignmentIndexMarkup(base.AlignmentMarkup):

138

139 name = 'index'

140

141 def refresh(self):

142 for index, column in enumerate(self.alignment.columns):

143 self[column] = index

144

145 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):

146

147 name = 'case'

148 io_class = 'SequenceCaseMarkup'

149 separator = ''

150 quotes = "'"

151

152 def refresh(self):

153 for monomer in self.sequence:

154 if monomer.input_code1.isupper():

155 monomer.case = 'upper'

156 elif monomer.input_code1.islower():

157 monomer.case = 'lower'

158

159 @staticmethod

160 def parse_item(monomer, value):

161 assert monomer.code1 == value.upper()

162 if value.isupper():

163 return 'upper'

164 if value.islower():

165 return 'lower'

166

167 @staticmethod

168 def repr_item(monomer, value):

169 if monomer.case == 'upper':

170 return monomer.code1.upper()

171 if monomer.case == 'lower':

172 return monomer.code1.lower()

173 raise AssertionError("Unknown monomer case")

174

175 class SequenceQualityMarkup(base.SequenceMarkup, IntMarkupMixin):

176 name = 'quality'

177

178 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):

179 name = 'pdb_resi'

180

181 def from_pdb(self):

182 for monomer in self.sequence:

183 try:

184 monomer.pdb_resi = monomer.pdb_residue.id[1]

185 except Exception:

186 pass

187

188 def add_pdb(self, download_pdb=None, xyz_only=False):

189 import structure

190 if download_pdb is None:

191 download_pdb = structure.cached_download_pdb

192

193 match = structure.pdb_id_parse(self.sequence.name)

194 code, model , chain = match['code'], match['model'], match['chain']

195 pdb_file = download_pdb(code)

196 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)

197 pdb_chain = pdb_structure[0][chain]

198 if not xyz_only:

199 self.sequence.pdb_chain = pdb_chain

200 for monomer in self.sequence:

201 if monomer in self:

202 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']

203 monomer.ca_xyz = pdb_residue['CA'].get_vector()

204 if not xyz_only:

205 monomer.pdb_residue = pdb_residue

206

207 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):

208 """ Secondary structure markup for sequence.

209

210 Depends on dsspcmbi program.

211 Sequence should be structure.SequenceMixin, pdb should be loaded.

212 Note that DSSP cannot handle mutiple models!

213 Note that dssp executable name is hardcoded (=dsspcmbi).

214

215 Notation:

216 * H -- alpha-helix

217 * B -- Isolated beta-bridge residue

218 * E -- Strand

219 * G -- 3-10 helix

220 * I -- pi-helix

221 * T -- Turn

222 * S -- Bend

223 * - -- Other

224 """

225

226 name = 'ss'

227 io_class = 'SequenceSecondaryStructureMarkup'

228

229 def refresh(self):

230 chain = self.sequence.pdb_chain

231 model = chain.get_parent()

232 pdb_file = NamedTemporaryFile(delete=False)

233 self.sequence.save_pdb(pdb_file)

234 pdb_file.close()

235 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')

236 for monomer in self.sequence:

237 try:

238 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]

239 except:

240 monomer.ss = '?' # FIXME

241 os.unlink(pdb_file.name)

242

243 # This MUST be the last statement in this module.

244 update()

245

246 # vim: set ts=4 sts=4 sw=4 et: