Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/diff/fd531580b9af/allpy/fileio.py
Дата изменения: Unknown
Дата индексирования: Thu Feb 28 01:42:38 2013
Кодировка:
allpy: allpy/fileio.py diff

allpy

diff allpy/fileio.py @ 724:fd531580b9af

Merge between Burkov and others.
author Boris Burkov <BurkovBA@gmail.com>
date Fri, 08 Jul 2011 16:20:20 +0400
parents 80043822a41e
children d16e8559b6dd
line diff
     1.1 --- a/allpy/fileio.py	Fri Jul 08 16:06:59 2011 +0400
     1.2 +++ b/allpy/fileio.py	Fri Jul 08 16:20:20 2011 +0400
     1.3 @@ -3,21 +3,49 @@
     1.4  from tempfile import NamedTemporaryFile
     1.5  import util
     1.6  
     1.7 +def get_markups_class(classname):
     1.8 +    """This ugly helper is to avoid bad untimely import loops."""
     1.9 +    import markups
    1.10 +    return getattr(markups, classname)
    1.11 +
    1.12  class File(object):
    1.13      """Automatical file IO."""
    1.14 -    def __new__(cls, file, format="fasta"):
    1.15 +    def __new__(cls, file, format="fasta", **kw):
    1.16          if format == "fasta":
    1.17 -            return FastaFile(file)
    1.18 +            return FastaFile(file, **kw)
    1.19 +        elif format == 'markup':
    1.20 +            return MarkupFile(file, **kw)
    1.21 +        elif format.startswith('markup:'):
    1.22 +            subformat = format.split(':',1)[1]
    1.23 +            return MarkupFile(file, format=subformat, **kw)
    1.24          else:
    1.25 -            return EmbossFile(file, format)
    1.26 +            return EmbossFile(file, format, **kw)
    1.27  
    1.28 -class FastaFile(object):
    1.29 +class AlignmentFile(object):
    1.30 +    """Some helpers."""
    1.31 +
    1.32 +    def __init__(self, file, format='fasta', gaps='-', wrap_column=70):
    1.33 +        self.file = file
    1.34 +        self.format = format
    1.35 +        self.gaps = gaps
    1.36 +        self.wrap_column = wrap_column
    1.37 +
    1.38 +    def write_alignment(self, alignment):
    1.39 +        """Append alignment to the file."""
    1.40 +        self.write_strings(
    1.41 +            (row, row.sequence.name, row.sequence.description)
    1.42 +            for row in alignment.rows_as_strings(self.gaps)
    1.43 +        )
    1.44 +
    1.45 +    def read_alignment(self, alignment):
    1.46 +        """Read alignment from the file."""
    1.47 +        append_row = alignment.append_row_from_string
    1.48 +        for name, description, body in self.read_strings():
    1.49 +            append_row(body, name, description, file.name, self.gaps)
    1.50 +
    1.51 +class FastaFile(AlignmentFile):
    1.52      """Fasta parser & writer."""
    1.53  
    1.54 -    def __init__(self, file, wrap_column=70):
    1.55 -        self.file = file
    1.56 -        self.wrap_column = wrap_column
    1.57 -
    1.58      def write_string(self, string, name, description=''):
    1.59          """Append one sequence to file."""
    1.60          if description:
    1.61 @@ -49,13 +77,141 @@
    1.62              body = util.remove_each(body, " \n\r\t\v")
    1.63              yield (name, description, body)
    1.64  
    1.65 -class EmbossFile(object):
    1.66 +class MarkupFile(AlignmentFile):
    1.67 +    """Parser & writer for our own marked alignment file format.
    1.68 +
    1.69 +    Marked alignment file consists of a list of records, separated with one or
    1.70 +    more empty lines. Each record consists of type name, header and optional
    1.71 +    contents. Type name is a line, containing just one word, describing the
    1.72 +    record type. Header is a sequence of lines, each in format `key: value`.
    1.73 +    Content, if present, is separated from header with an empty line.
    1.74 +
    1.75 +    Type names and header key names are case-insensitive.
    1.76 +
    1.77 +    Known record types now are:
    1.78 +
    1.79 +    - `alignment` -- this must be the last record in file for now
    1.80 +    - `sequence_markup`
    1.81 +    - `alignment_markup`
    1.82 +
    1.83 +    Example::
    1.84 +
    1.85 +        sequence_markup
    1.86 +        sequence_name: cyb5_mouse
    1.87 +        sequence_description:
    1.88 +        name: pdb_residue_number
    1.89 +        type: SequencePDBResidueNumberMarkup
    1.90 +        markup: -,12,121,122,123,124,13,14,15,-,-,16
    1.91 +
    1.92 +        alignment_markup
    1.93 +        name: geometrical_core
    1.94 +        type: AlignmentGeometricalCoreMarkup
    1.95 +        markup: -,-,-,-,+,+,+,-,-,-,+,+,-,-,-,-
    1.96 +
    1.97 +        alignment
    1.98 +        format: fasta
    1.99 +
   1.100 +        > cyb5_mouse
   1.101 +        seqvencemouse
   1.102 +    """
   1.103 +
   1.104 +    _empty_line = ''
   1.105 +    """Helper attribute for write_empty_line."""
   1.106 +
   1.107 +    def write_alignment(self, alignment):
   1.108 +        """Write alignment to file."""
   1.109 +        self.write_markups(alignment.markups, 'alignment_markup')
   1.110 +        for sequence in alignment.sequences:
   1.111 +            record = {
   1.112 +                'sequence_name': sequence.name,
   1.113 +                'sequence_description': sequence.description,
   1.114 +            }
   1.115 +            self.write_markups(sequence.markups, 'sequence_markup', record)
   1.116 +        record = {'type': 'alignment', 'format': self.format}
   1.117 +        self.write_record(record)
   1.118 +        self.write_empty_line()
   1.119 +        alignment.to_file(self.file)
   1.120 +
   1.121 +    def write_markups(self, markups, type, pre_record={}):
   1.122 +        """Write a dictionary of markups as series of records."""
   1.123 +        for name, markup in markups.items():
   1.124 +            record = markup.to_record()
   1.125 +            record.update(pre_record)
   1.126 +            record['type'] = type
   1.127 +            record['name'] = name
   1.128 +            record['class'] = markup.__class__.__name__
   1.129 +            self.write_record(record)
   1.130 +
   1.131 +    def write_record(self, record):
   1.132 +        """Write record to file. Add new line before every but first record."""
   1.133 +        self.write_empty_line()
   1.134 +        self.file.write('%s\n' % record['type'])
   1.135 +        del record['type']
   1.136 +        for key, value in record.items():
   1.137 +            self.file.write('%s: %s\n' % (key, value))
   1.138 +
   1.139 +    def write_empty_line(self):
   1.140 +        """Add empty line every time except the first call."""
   1.141 +        self.file.write(self._empty_line)
   1.142 +        self._empty_line = '\n'
   1.143 +
   1.144 +    def read_alignment(self, alignment):
   1.145 +        """Read alignment from file."""
   1.146 +        for record in list(self.read_records(alignment)):
   1.147 +            handler = getattr(self, 'add_%s' % record['type'])
   1.148 +            handler(alignment, record)
   1.149 +
   1.150 +    def add_sequence_markup(self, alignment, record):
   1.151 +        """Found sequence markup record in file. Do something about it."""
   1.152 +        for sequence in alignment.sequences:
   1.153 +            if sequence.name == record['sequence_name']:
   1.154 +                description = record.get('sequence_description')
   1.155 +                if description:
   1.156 +                    assert sequence.description == description
   1.157 +                cls = get_markups_class(record['class'])
   1.158 +                cls.from_record(sequence, record, name=record.get('name'))
   1.159 +                return
   1.160 +        raise AssertionError("Could not find sequence in alignment")
   1.161 +
   1.162 +    def add_alignment_markup(self, alignment, record):
   1.163 +        """Found alignment markup record in file. Do something about it."""
   1.164 +        cls = get_markups_class(record['class'])
   1.165 +        cls.from_record(alignment, record, name=record.get('name'))
   1.166 +
   1.167 +    def add_alignment(self, alignment, record):
   1.168 +        """Found alignment record. It has been handled in read_payload."""
   1.169 +        pass
   1.170 +
   1.171 +    def read_records(self, alignment):
   1.172 +        """Read records and return them as a list of dicts."""
   1.173 +        for line in self.file:
   1.174 +            if line.strip() == "":
   1.175 +                continue
   1.176 +            yield self.read_record(alignment, line)
   1.177 +
   1.178 +    def read_record(self, alignment, type):
   1.179 +        """Read record headers and record payload."""
   1.180 +        type = type.strip().lower()
   1.181 +        record = {'type': type}
   1.182 +        for line in self.file:
   1.183 +            if line.strip() == "":
   1.184 +                self.read_payload(alignment, record, type)
   1.185 +                return record
   1.186 +            key, value = line.split(':', 1)
   1.187 +            key = key.strip().lower()
   1.188 +            value = value.strip()
   1.189 +            record[key] = value
   1.190 +        return record
   1.191 +
   1.192 +    def read_payload(self, alignment, record, type):
   1.193 +        """Read record payload, if necessary."""
   1.194 +        if type == 'alignment':
   1.195 +            io = File(self.file, record.get('format', 'fasta'))
   1.196 +            io.read_alignment(alignment)
   1.197 +
   1.198 +class EmbossFile(AlignmentFile):
   1.199      """Parser & writer for file formats supported by EMBOSS."""
   1.200  
   1.201 -    def __init__(self, file, format):
   1.202 -        self.file = file
   1.203 -        self.format = format
   1.204 -
   1.205      def write_strings(self, sequences):
   1.206          """Write sequences to file."""
   1.207          pipe = Popen(['seqret', 'stdin', '%s::stdout' % self.format],