view allpy/markups.py @ 888:53800c7e3458
add test for secondary structure markup for sequence
Module protein_pdb was copied from blocks3d/protein_pdb.py.
Sequence is created from string, pdb is loaded, ss-markup is applied.
Then secondary structure is checked to be entirely alpha-helix.
see #120
author |
Boris Nagaev <bnagaev@gmail.com> |
date |
Sun, 18 Sep 2011 15:11:49 +0400 |
parents |
077e5e18a600 |
children |
6134ae1dfdae |
line source
2 from tempfile import NamedTemporaryFile
4 from Bio.PDB import DSSP
9 """A dictionary of default markup name -> markup class."""
12 """Update `by_name` dictionary.
14 If any arguments are given, add them to markups namespace beforehands.
16 # Add user classes if necessary
17 for markup_class in args:
18 class_name = markup_class.__name__
19 assert class_name not in globals(), "SameNamed markup already exists!"
20 globals()[class_name] = markup_class
21 # Update `by_name` dictonary
24 for markup_class in globals().values():
25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):
26 fullname = markup_class.kind, markup_class.name
27 assert fullname not in by_name, "Samenamed markup already exists!"
28 by_name[fullname] = markup_class
30 class MarkupIOMixin(base.Markup):
31 """Standard helper mixin for creating saveable markups."""
34 """Separator to use when saving/loading markup."""
37 """MUST be overloaded when subclassing. io_class in file."""
40 def parse_item(key, value):
41 """Deserialize one item of markup. Overload when subclassing."""
45 def repr_item(key, value):
46 """Serialize one item of markup. Overload when subclassing."""
50 def from_record(cls, container, record, name=None):
51 """Read markup from semi-parsed record from 'markup' file."""
52 assert record['io_class'] == cls.io_class
53 separator = record.get('separator', cls.separator)
54 values = record['markup'].split(separator)
55 result = container.add_markup(name, markup_class=cls)
56 assert len(values) == len(result.sorted_keys())
57 for key, value in zip(result.sorted_keys(), values):
59 result[key] = cls.parse_item(key, value)
63 """Write markup to semi-serialized record for 'markup' file."""
65 for key in self.sorted_keys():
67 values.append(self.repr_item(key, self[key]))
70 markup = self.separator.join(values)
73 'io_class': self.io_class,
74 'separator': self.separator,
77 class IntMarkupMixin(MarkupIOMixin):
78 """Markup that has integer values."""
80 io_class = 'IntMarkup'
83 def parse_item(key, value):
86 class SequenceNumberMarkup(base.SequenceMarkup):
91 for number, monomer in enumerate(self.sequence, 1):
92 monomer.number = number
94 class SequenceIndexMarkup(base.SequenceMarkup):
99 for index, monomer in enumerate(self.sequence):
100 monomer.index = index
102 class AlignmentNumberMarkup(base.AlignmentMarkup):
107 for number, column in enumerate(self.alignment.columns, 1):
108 self[column] = number
110 class AlignmentIndexMarkup(base.AlignmentMarkup):
115 for index, column in enumerate(self.alignment.columns):
118 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):
121 io_class = 'SequenceCaseMarkup'
124 for monomer in self.sequence:
125 if monomer.input_code1.isupper():
126 monomer.case = 'upper'
127 elif monomer.input_code1.islower():
128 monomer.case = 'lower'
131 def parse_value(monomer, value):
132 assert mnomer.code1 == value.upper()
139 def repr_value(monomer, value):
140 if monomer.case == 'upper':
141 return monomer.code1.upper()
142 if monomer.case == 'lower':
143 return monomer.code1.lower()
144 raise AssertionError("Unknown monomer case")
146 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):
150 for monomer in self.sequence:
152 monomer.pdb_resi = monomer.pdb_residue.id[1]
156 def add_pdb(self, download_pdb=None, xyz_only=False):
158 if download_pdb is None:
159 download_pdb = structure.cached_download_pdb
161 match = structure.pdb_id_parse(self.sequence.name)
162 code, model , chain = match['code'], match['model'], match['chain']
163 pdb_file = download_pdb(code)
164 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)
165 pdb_chain = pdb_structure[0][chain]
167 self.sequence.pdb_chain = pdb_chain
168 for monomer in self.sequence:
170 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']
171 monomer.ca_xyz = pdb_residue['CA'].get_vector()
173 monomer.pdb_residue = pdb_residue
175 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):
176 """ Secondary structure markup for sequence.
178 Depends on dsspcmbi program.
179 Sequence should be structure.SequenceMixin, pdb should be loaded.
180 Note that DSSP cannot handle mutiple models!
181 Note that dssp executable name is hardcoded (=dsspcmbi).
185 * B -- Isolated beta-bridge residue
195 io_class = 'SequenceSecondaryStructureMarkup'
198 chain = self.sequence.pdb_chain
199 model = chain.get_parent()
200 pdb_file = NamedTemporaryFile(delete=False)
201 self.sequence.save_pdb(pdb_file)
203 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')
204 for monomer in self.sequence:
205 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]
206 os.unlink(pdb_file.name)
208 # This MUST be the last statement in this module.
211 # vim: set ts=4 sts=4 sw=4 et: