Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/7ebba94eece2/allpy/markups.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 05:14:03 2013
Кодировка:
allpy: 7ebba94eece2 allpy/markups.py

allpy

view allpy/markups.py @ 900:7ebba94eece2

MarkupIOMixin: added attribute quotes to allow quotation around markup serialized string; fixed implementation of SequenceCaseMarkup based on that; [closes #125]
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Tue, 11 Oct 2011 17:21:44 +0400
parents 6134ae1dfdae
children b8ddd35fac65
line source
1 import os
2 from tempfile import NamedTemporaryFile
4 from Bio.PDB import DSSP
6 import base
8 by_name = {}
9 """A dictionary of default markup name -> markup class."""
11 def update(*args):
12 """Update `by_name` dictionary.
14 If any arguments are given, add them to markups namespace beforehands.
15 """
16 # Add user classes if necessary
17 for markup_class in args:
18 class_name = markup_class.__name__
19 assert class_name not in globals(), "SameNamed markup already exists!"
20 globals()[class_name] = markup_class
21 # Update `by_name` dictonary
22 global by_name
23 by_name = {}
24 for markup_class in globals().values():
25 if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'):
26 fullname = markup_class.kind, markup_class.name
27 assert fullname not in by_name, "Samenamed markup already exists!"
28 by_name[fullname] = markup_class
30 class MarkupIOMixin(base.Markup):
31 """Standard helper mixin for creating saveable markups."""
33 separator = ','
34 """Separator to use when saving/loading markup."""
36 quotes = ''
37 """Quotation sign used on the markup."""
39 io_class = None
40 """MUST be overloaded when subclassing. io_class in file."""
42 @staticmethod
43 def parse_item(key, value):
44 """Deserialize one item of markup. Overload when subclassing."""
45 return value
47 @staticmethod
48 def repr_item(key, value):
49 """Serialize one item of markup. Overload when subclassing."""
50 return str(value)
52 @classmethod
53 def from_record(cls, container, record, name=None):
54 """Read markup from semi-parsed record from 'markup' file."""
55 assert record['io_class'] == cls.io_class
56 separator = record.get('separator', cls.separator)
57 quotes = record.get('quotes', cls.quotes)
58 values = record['markup'].strip(quotes)
59 if separator:
60 values = values.split(separator)
61 result = container.add_markup(name, markup_class=cls)
62 assert len(values) == len(result.sorted_keys())
63 for key, value in zip(result.sorted_keys(), values):
64 if value:
65 result[key] = cls.parse_item(key, value)
66 return result
68 def to_record(self):
69 """Write markup to semi-serialized record for 'markup' file."""
70 values = []
71 for key in self.sorted_keys():
72 if key in self:
73 values.append(self.repr_item(key, self[key]))
74 else:
75 values.append('')
76 markup = self.separator.join(values)
77 return {
78 'markup': markup,
79 'io_class': self.io_class,
80 'separator': self.separator,
81 'quotes': self.quotes,
82 }
84 class IntMarkupMixin(MarkupIOMixin):
85 """Markup that has integer values."""
87 io_class = 'IntMarkup'
89 @staticmethod
90 def parse_item(key, value):
91 return int(value)
93 class SequenceNumberMarkup(base.SequenceMarkup):
95 name = 'number'
97 def refresh(self):
98 for number, monomer in enumerate(self.sequence, 1):
99 monomer.number = number
101 class SequenceIndexMarkup(base.SequenceMarkup):
103 name = 'index'
105 def refresh(self):
106 for index, monomer in enumerate(self.sequence):
107 monomer.index = index
109 class AlignmentNumberMarkup(base.AlignmentMarkup):
111 name = 'number'
113 def refresh(self):
114 for number, column in enumerate(self.alignment.columns, 1):
115 self[column] = number
117 class AlignmentIndexMarkup(base.AlignmentMarkup):
119 name = 'index'
121 def refresh(self):
122 for index, column in enumerate(self.alignment.columns):
123 self[column] = index
125 class SequenceCaseMarkup(base.SequenceMarkup, MarkupIOMixin):
127 name = 'case'
128 io_class = 'SequenceCaseMarkup'
129 separator = ''
130 quotes = "'"
132 def refresh(self):
133 for monomer in self.sequence:
134 if monomer.input_code1.isupper():
135 monomer.case = 'upper'
136 elif monomer.input_code1.islower():
137 monomer.case = 'lower'
139 @staticmethod
140 def parse_item(monomer, value):
141 assert monomer.code1 == value.upper()
142 if value.isupper():
143 return 'upper'
144 if value.islower():
145 return 'lower'
147 @staticmethod
148 def repr_item(monomer, value):
149 if monomer.case == 'upper':
150 return monomer.code1.upper()
151 if monomer.case == 'lower':
152 return monomer.code1.lower()
153 raise AssertionError("Unknown monomer case")
155 class SequencePdbResiMarkup(base.SequenceMarkup, IntMarkupMixin):
156 name = 'pdb_resi'
158 def from_pdb(self):
159 for monomer in self.sequence:
160 try:
161 monomer.pdb_resi = monomer.pdb_residue.id[1]
162 except Exception:
163 pass
165 def add_pdb(self, download_pdb=None, xyz_only=False):
166 import structure
167 if download_pdb is None:
168 download_pdb = structure.cached_download_pdb
170 match = structure.pdb_id_parse(self.sequence.name)
171 code, model , chain = match['code'], match['model'], match['chain']
172 pdb_file = download_pdb(code)
173 pdb_structure = structure.get_structure(pdb_file, self.sequence.name)
174 pdb_chain = pdb_structure[0][chain]
175 if not xyz_only:
176 self.sequence.pdb_chain = pdb_chain
177 for monomer in self.sequence:
178 if monomer in self:
179 pdb_residue = pdb_chain[' ', monomer.pdb_resi, ' ']
180 monomer.ca_xyz = pdb_residue['CA'].get_vector()
181 if not xyz_only:
182 monomer.pdb_residue = pdb_residue
184 class SequenceSecondaryStructureMarkup(base.SequenceMarkup, MarkupIOMixin):
185 """ Secondary structure markup for sequence.
187 Depends on dsspcmbi program.
188 Sequence should be structure.SequenceMixin, pdb should be loaded.
189 Note that DSSP cannot handle mutiple models!
190 Note that dssp executable name is hardcoded (=dsspcmbi).
192 Notation:
193 * H -- alpha-helix
194 * B -- Isolated beta-bridge residue
195 * E -- Strand
196 * G -- 3-10 helix
197 * I -- pi-helix
198 * T -- Turn
199 * S -- Bend
200 * - -- Other
201 """
203 name = 'ss'
204 io_class = 'SequenceSecondaryStructureMarkup'
206 def refresh(self):
207 chain = self.sequence.pdb_chain
208 model = chain.get_parent()
209 pdb_file = NamedTemporaryFile(delete=False)
210 self.sequence.save_pdb(pdb_file)
211 pdb_file.close()
212 dssp=DSSP(model, pdb_file.name, dssp='dsspcmbi')
213 for monomer in self.sequence:
214 try:
215 monomer.ss = dssp[(chain.get_id(), monomer.pdb_residue.get_id())][1]
216 except:
217 monomer.ss = '?' # FIXME
218 os.unlink(pdb_file.name)
220 # This MUST be the last statement in this module.
221 update()
223 # vim: set ts=4 sts=4 sw=4 et: