allpy
diff allpy/fileio.py @ 711:21cfc7897a8f
Implemented markup fileIO (closes #56)
This is done by adding file format 'markup' or 'markup:formatname', where
'formatname' is otherwise known alignment format.
The file format for is described briefly in fileio.MarkupFile docstrings.
This commit also contains example of defining Markup saving mixin:
markups.IntMarkupMixin and a test for it.
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Thu, 07 Jul 2011 22:32:21 +0400 |
parents | 80043822a41e |
children | d16e8559b6dd |
line diff
1.1 --- a/allpy/fileio.py Thu Jul 07 22:27:14 2011 +0400 1.2 +++ b/allpy/fileio.py Thu Jul 07 22:32:21 2011 +0400 1.3 @@ -3,11 +3,21 @@ 1.4 from tempfile import NamedTemporaryFile 1.5 import util 1.6 1.7 +def get_markups_class(classname): 1.8 + """This ugly helper is to avoid bad untimely import loops.""" 1.9 + import markups 1.10 + return getattr(markups, classname) 1.11 + 1.12 class File(object): 1.13 """Automatical file IO.""" 1.14 def __new__(cls, file, format="fasta", **kw): 1.15 if format == "fasta": 1.16 return FastaFile(file, **kw) 1.17 + elif format == 'markup': 1.18 + return MarkupFile(file, **kw) 1.19 + elif format.startswith('markup:'): 1.20 + subformat = format.split(':',1)[1] 1.21 + return MarkupFile(file, format=subformat, **kw) 1.22 else: 1.23 return EmbossFile(file, format, **kw) 1.24 1.25 @@ -67,6 +77,138 @@ 1.26 body = util.remove_each(body, " \n\r\t\v") 1.27 yield (name, description, body) 1.28 1.29 +class MarkupFile(AlignmentFile): 1.30 + """Parser & writer for our own marked alignment file format. 1.31 + 1.32 + Marked alignment file consists of a list of records, separated with one or 1.33 + more empty lines. Each record consists of type name, header and optional 1.34 + contents. Type name is a line, containing just one word, describing the 1.35 + record type. Header is a sequence of lines, each in format `key: value`. 1.36 + Content, if present, is separated from header with an empty line. 1.37 + 1.38 + Type names and header key names are case-insensitive. 1.39 + 1.40 + Known record types now are: 1.41 + 1.42 + - `alignment` -- this must be the last record in file for now 1.43 + - `sequence_markup` 1.44 + - `alignment_markup` 1.45 + 1.46 + Example:: 1.47 + 1.48 + sequence_markup 1.49 + sequence_name: cyb5_mouse 1.50 + sequence_description: 1.51 + name: pdb_residue_number 1.52 + type: SequencePDBResidueNumberMarkup 1.53 + markup: -,12,121,122,123,124,13,14,15,-,-,16 1.54 + 1.55 + alignment_markup 1.56 + name: geometrical_core 1.57 + type: AlignmentGeometricalCoreMarkup 1.58 + markup: -,-,-,-,+,+,+,-,-,-,+,+,-,-,-,- 1.59 + 1.60 + alignment 1.61 + format: fasta 1.62 + 1.63 + > cyb5_mouse 1.64 + seqvencemouse 1.65 + """ 1.66 + 1.67 + _empty_line = '' 1.68 + """Helper attribute for write_empty_line.""" 1.69 + 1.70 + def write_alignment(self, alignment): 1.71 + """Write alignment to file.""" 1.72 + self.write_markups(alignment.markups, 'alignment_markup') 1.73 + for sequence in alignment.sequences: 1.74 + record = { 1.75 + 'sequence_name': sequence.name, 1.76 + 'sequence_description': sequence.description, 1.77 + } 1.78 + self.write_markups(sequence.markups, 'sequence_markup', record) 1.79 + record = {'type': 'alignment', 'format': self.format} 1.80 + self.write_record(record) 1.81 + self.write_empty_line() 1.82 + alignment.to_file(self.file) 1.83 + 1.84 + def write_markups(self, markups, type, pre_record={}): 1.85 + """Write a dictionary of markups as series of records.""" 1.86 + for name, markup in markups.items(): 1.87 + record = markup.to_record() 1.88 + record.update(pre_record) 1.89 + record['type'] = type 1.90 + record['name'] = name 1.91 + record['class'] = markup.__class__.__name__ 1.92 + self.write_record(record) 1.93 + 1.94 + def write_record(self, record): 1.95 + """Write record to file. Add new line before every but first record.""" 1.96 + self.write_empty_line() 1.97 + self.file.write('%s\n' % record['type']) 1.98 + del record['type'] 1.99 + for key, value in record.items(): 1.100 + self.file.write('%s: %s\n' % (key, value)) 1.101 + 1.102 + def write_empty_line(self): 1.103 + """Add empty line every time except the first call.""" 1.104 + self.file.write(self._empty_line) 1.105 + self._empty_line = '\n' 1.106 + 1.107 + def read_alignment(self, alignment): 1.108 + """Read alignment from file.""" 1.109 + for record in list(self.read_records(alignment)): 1.110 + handler = getattr(self, 'add_%s' % record['type']) 1.111 + handler(alignment, record) 1.112 + 1.113 + def add_sequence_markup(self, alignment, record): 1.114 + """Found sequence markup record in file. Do something about it.""" 1.115 + for sequence in alignment.sequences: 1.116 + if sequence.name == record['sequence_name']: 1.117 + description = record.get('sequence_description') 1.118 + if description: 1.119 + assert sequence.description == description 1.120 + cls = get_markups_class(record['class']) 1.121 + cls.from_record(sequence, record, name=record.get('name')) 1.122 + return 1.123 + raise AssertionError("Could not find sequence in alignment") 1.124 + 1.125 + def add_alignment_markup(self, alignment, record): 1.126 + """Found alignment markup record in file. Do something about it.""" 1.127 + cls = get_markups_class(record['class']) 1.128 + cls.from_record(alignment, record, name=record.get('name')) 1.129 + 1.130 + def add_alignment(self, alignment, record): 1.131 + """Found alignment record. It has been handled in read_payload.""" 1.132 + pass 1.133 + 1.134 + def read_records(self, alignment): 1.135 + """Read records and return them as a list of dicts.""" 1.136 + for line in self.file: 1.137 + if line.strip() == "": 1.138 + continue 1.139 + yield self.read_record(alignment, line) 1.140 + 1.141 + def read_record(self, alignment, type): 1.142 + """Read record headers and record payload.""" 1.143 + type = type.strip().lower() 1.144 + record = {'type': type} 1.145 + for line in self.file: 1.146 + if line.strip() == "": 1.147 + self.read_payload(alignment, record, type) 1.148 + return record 1.149 + key, value = line.split(':', 1) 1.150 + key = key.strip().lower() 1.151 + value = value.strip() 1.152 + record[key] = value 1.153 + return record 1.154 + 1.155 + def read_payload(self, alignment, record, type): 1.156 + """Read record payload, if necessary.""" 1.157 + if type == 'alignment': 1.158 + io = File(self.file, record.get('format', 'fasta')) 1.159 + io.read_alignment(alignment) 1.160 + 1.161 class EmbossFile(AlignmentFile): 1.162 """Parser & writer for file formats supported by EMBOSS.""" 1.163