allpy: 0dec37632d10 allpy/markups.py

allpy

view allpy/markups.py @ 954:0dec37632d10

blocks3d/wt: add reference to the Russian Foundation for Basic Research

author	Boris Nagaev <bnagaev@gmail.com>
date	Tue, 24 Jan 2012 22:59:54 +0400
parents	7ebba94eece2
children	695cf9ab2293

line source

1 import os

2 from tempfile import NamedTemporaryFile

4 from Bio.PDB import DSSP

6 import base

8 by_name = {}

9 """A dictionary of default markup name -> markup class."""

11 def update(*args):

12 """Update `by_name` dictionary.

14 If any arguments are given, add them to markups namespace beforehands.

15 """

16 # Add user classes if necessary

17 for markup_class in args:

18 class_name = markup_class.__name__

19 assert class_name not in globals(), "SameNamed markup already exists!"

20 globals()[class_name] = markup_class

21 # Update `by_name` dictonary

22 global by_name

23 by_name = {}

24 for markup_class in globals().values():

25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):

26 fullname = markup_class.kind, markup_class.name

27 assert fullname not in by_name, "Samenamed markup already exists!"

28 by_name[fullname] = markup_class

30 class MarkupIOMixin(base.Markup):

31 """Standard helper mixin for creating saveable markups."""

33 separator = ','

34 """Separator to use when saving/loading markup."""

36 quotes = ''

37 """Quotation sign used on the markup."""

39 io_class = None

40 """MUST be overloaded when subclassing. io_class in file."""

42 @staticmethod

43 def parse_item(key, value):

44 """Deserialize one item of markup. Overload when subclassing."""

45 return value

47 @staticmethod

48 def repr_item(key, value):

49 """Serialize one item of markup. Overload when subclassing."""

50 return str(value)

52 @classmethod

53 def from_record(cls, container, record, name=None):

54 """Read markup from semi-parsed record from 'markup' file."""

55 assert record['io_class'] == cls.io_class

56 separator = record.get('separator', cls.separator)

57 quotes = record.get('quotes', cls.quotes)

58 values = record['markup'].strip(quotes)

59 if separator:

60 values = values.split(separator)

61 result = container.add_markup(name, markup_class=cls)

62 assert len(values) == len(result.sorted_keys())

63 for key, value in zip(result.sorted_keys(), values):

64 if value:

65 result[key] = cls.parse_item(key, value)

66 return result

68 def to_record(self):

69 """Write markup to semi-serialized record for 'markup' file."""

70 values = []

71 for key in self.sorted_keys():

72 if key in self:

73 values.append(self.repr_item(key, self[key]))

74 else:

75 values.append('')

76 markup = self.separator.join(values)

77 return {

78 'markup': markup,

79 'io_class': self.io_class,

80 'separator': self.separator,

81 'quotes': self.quotes,

82 }

84 class IntMarkupMixin(MarkupIOMixin):

85 """Markup that has integer values."""

87 io_class = 'IntMarkup'

89 @staticmethod

90 def parse_item(key, value):

91 return int(value)

93 class SequenceNumberMarkup(base.SequenceMarkup):

95 name = 'number'

97 def refresh(self):

98 for number, monomer in enumerate(self.sequence, 1):

99 monomer.number = number

100

101 class SequenceIndexMarkup(base.SequenceMarkup):

102

103 name = 'index'

104

105 def refresh(self):

106 for index, monomer in enumerate(self.sequence):

107 monomer.index = index

108

109 class AlignmentNumberMarkup(base.AlignmentMarkup):

110

111 name = 'number'

112

113 def refresh(self):

114 for number, column in enumerate(self.alignment.columns, 1):

115 self[column] = number

116

117 class AlignmentIndexMarkup(base.AlignmentMarkup):

118

119 name = 'index'

120

121 def refresh(self):

122 for index, column in enumerate(self.alignment.columns):

123 self[column] = index

124

125 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):

126

127 name = 'case'

128 io_class = 'SequenceCaseMarkup'

129 separator = ''

130 quotes = "'"

131

132 def refresh(self):

133 for monomer in self.sequence:

134 if monomer.input_code1.isupper():

135 monomer.case = 'upper'

136 elif monomer.input_code1.islower():

137 monomer.case = 'lower'

138

139 @staticmethod

140 def parse_item(monomer, value):

141 assert monomer.code1 == value.upper()

142 if value.isupper():

143 return 'upper'

144 if value.islower():

145 return 'lower'

146

147 @staticmethod

148 def repr_item(monomer, value):

149 if monomer.case == 'upper':

150 return monomer.code1.upper()

151 if monomer.case == 'lower':

152 return monomer.code1.lower()

153 raise AssertionError("Unknown monomer case")

154

155 class SequenceQualityMarkup(base.SequenceMarkup, IntMarkupMixin):

156 name = 'quality'

157

158 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):

159 name = 'pdb_resi'

160

161 def from_pdb(self):

162 for monomer in self.sequence:

163 try:

164 monomer.pdb_resi = monomer.pdb_residue.id[1]

165 except Exception:

166 pass

167

168 def add_pdb(self, download_pdb=None, xyz_only=False):

169 import structure

170 if download_pdb is None:

171 download_pdb = structure.cached_download_pdb

172

173 match = structure.pdb_id_parse(self.sequence.name)

174 code, model , chain = match['code'], match['model'], match['chain']

175 pdb_file = download_pdb(code)

176 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)

177 pdb_chain = pdb_structure[0][chain]

178 if not xyz_only:

179 self.sequence.pdb_chain = pdb_chain

180 for monomer in self.sequence:

181 if monomer in self:

182 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']

183 monomer.ca_xyz = pdb_residue['CA'].get_vector()

184 if not xyz_only:

185 monomer.pdb_residue = pdb_residue

186

187 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):

188 """ Secondary structure markup for sequence.

189

190 Depends on dsspcmbi program.

191 Sequence should be structure.SequenceMixin, pdb should be loaded.

192 Note that DSSP cannot handle mutiple models!

193 Note that dssp executable name is hardcoded (=dsspcmbi).

194

195 Notation:

196 * H -- alpha-helix

197 * B -- Isolated beta-bridge residue

198 * E -- Strand

199 * G -- 3-10 helix

200 * I -- pi-helix

201 * T -- Turn

202 * S -- Bend

203 * - -- Other

204 """

205

206 name = 'ss'

207 io_class = 'SequenceSecondaryStructureMarkup'

208

209 def refresh(self):

210 chain = self.sequence.pdb_chain

211 model = chain.get_parent()

212 pdb_file = NamedTemporaryFile(delete=False)

213 self.sequence.save_pdb(pdb_file)

214 pdb_file.close()

215 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')

216 for monomer in self.sequence:

217 try:

218 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]

219 except:

220 monomer.ss = '?' # FIXME

221 os.unlink(pdb_file.name)

222

223 # This MUST be the last statement in this module.

224 update()

225

226 # vim: set ts=4 sts=4 sw=4 et: