Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/a4d7438c142f
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:28:53 2012
Кодировка:
allpy: a4d7438c142f

allpy

changeset 418:a4d7438c142f

add fileio module, remove fasta module, add msf support (see #31)
author boris (netbook) <bnagaev@gmail.com>
date Fri, 11 Feb 2011 15:04:17 +0300
parents 8d678611cd1c
children e4c4151b9dc3
files allpy/base.py allpy/fasta.py allpy/fileio.py
diffstat 3 files changed, 115 insertions(+), 49 deletions(-) [+]
line diff
     1.1 --- a/allpy/base.py	Fri Feb 11 14:13:42 2011 +0300
     1.2 +++ b/allpy/base.py	Fri Feb 11 15:04:17 2011 +0300
     1.3 @@ -2,7 +2,7 @@
     1.4  import re
     1.5  
     1.6  import util
     1.7 -import fasta
     1.8 +import fileio
     1.9  
    1.10  # import this very module as means of having all related classes in one place
    1.11  import base
    1.12 @@ -196,22 +196,34 @@
    1.13          If sequences in file have gaps (detected as characters belonging to
    1.14          `gaps` set), treat them accordingly.
    1.15          """
    1.16 -        assert format == 'fasta', "We don't support other formats yet"
    1.17 -        for (name, description, body) in fasta.parse_file(file):
    1.18 +        sequences = []
    1.19 +        if format == 'fasta':
    1.20 +            sequences = fileio.FastaIo(file).get_all_strings()
    1.21 +        elif format == 'msf':
    1.22 +            sequences = fileio.MsfIo(file).get_all_strings()
    1.23 +        else:
    1.24 +            raise Exception("We don't support other formats yet")
    1.25 +        for (name, description, body) in sequences:
    1.26              self.append_row_from_string(body, name, description, file.name, gaps)
    1.27          return self
    1.28  
    1.29 -    def to_file(self, file, format='fasta'):
    1.30 +    def to_file(self, file, format='fasta', gap='-'):
    1.31          """Write alignment in FASTA file as sequences with gaps."""
    1.32          assert format == "fasta", "We don't support other formats yet"
    1.33          def char(monomer):
    1.34              if monomer:
    1.35                  return monomer.code1
    1.36 -            return "-"
    1.37 +            return gap
    1.38 +        if format == 'fasta':
    1.39 +            io = fileio.FastaIo(file)
    1.40 +        elif format == 'msf':
    1.41 +            io = fileio.MsfIo(file)
    1.42 +        else:
    1.43 +            raise Exception("We don't support other formats yet")
    1.44          for row in self.rows_as_lists():
    1.45              seq = row.sequence
    1.46              line = "".join(map(char, row))
    1.47 -            fasta.save_file(file, line, seq.name, seq.description)
    1.48 +            io.save_string(line, seq.name, seq.description)
    1.49  
    1.50      # Data access methods for alignment
    1.51      # =================================
     2.1 --- a/allpy/fasta.py	Fri Feb 11 14:13:42 2011 +0300
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,43 +0,0 @@
     2.4 -import util
     2.5 -
     2.6 -def parse_file(file):
     2.7 -    """Parse fasta file, remove spaces and newlines from sequence bodies.
     2.8 -
     2.9 -    Return a list of tuples (name, description, sequence_body).
    2.10 -    """
    2.11 -    sequences = []
    2.12 -    for part in file.read().split("\n>"):
    2.13 -        header, _, body = part.partition("\n")
    2.14 -        header = header.lstrip(">").strip()
    2.15 -        name, _, description = header.partition(" ")
    2.16 -        name = name.strip()
    2.17 -        description = description.strip()
    2.18 -        body = util.remove_each(body, " \n\r\t\v")
    2.19 -        sequences.append((name, description, body))
    2.20 -    return sequences
    2.21 -
    2.22 -def save_file(out_file, string, name, description='', long_line=70):
    2.23 -    """ Saves given string to out_file in fasta_format
    2.24 -
    2.25 -    Splits long lines to substrings of length=long_line
    2.26 -    To prevent this, set long_line=None
    2.27 -    """
    2.28 -    if description:
    2.29 -        name += " " + description
    2.30 -    out_file.write(">%s\n" % name)
    2.31 -    if long_line:
    2.32 -        for i in range(0, len(string) // long_line + 1):
    2.33 -            out_file.write("%s\n" % string[i*long_line : i*long_line + long_line])
    2.34 -    else:
    2.35 -        out_file.write("%s\n" % string)
    2.36 -
    2.37 -def determine_long_line(in_file):
    2.38 -    """ Returns maximum sequence line length in fasta file """
    2.39 -    sequences = in_file.read().split('>')
    2.40 -    for sequence in sequences[1:]:
    2.41 -        lines = sequence.split('\n')[1:]
    2.42 -        if len(lines) >= 2:
    2.43 -            return len(lines[0].strip())
    2.44 -    return 70
    2.45 -
    2.46 -# vim: set ts=4 sts=4 sw=4 et:
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/allpy/fileio.py	Fri Feb 11 15:04:17 2011 +0300
     3.3 @@ -0,0 +1,97 @@
     3.4 +import os
     3.5 +from tempfile import NamedTemporaryFile
     3.6 +
     3.7 +import util
     3.8 +
     3.9 +class BaseIo(object):
    3.10 +    """ Base class providing alignment/sequence import and export
    3.11 +
    3.12 +    Data:
    3.13 +        * file - file object
    3.14 +    """
    3.15 +
    3.16 +    def __init__(self, file):
    3.17 +        self.file = file
    3.18 +
    3.19 +    def save_string(self, string, name, description=''):
    3.20 +        """ Saves given string to file
    3.21 +
    3.22 +        Splits long lines to substrings of length=long_line
    3.23 +        To prevent this, set long_line=None
    3.24 +        """
    3.25 +        pass
    3.26 +
    3.27 +    def get_all_strings(self):
    3.28 +        """Parse fasta file, remove spaces and newlines from sequence bodies.
    3.29 +
    3.30 +        Return a list of tuples (name, description, sequence_body).
    3.31 +        """
    3.32 +        pass
    3.33 +
    3.34 +    def get_string(self, name):
    3.35 +        """ return tuple (name, description, string) for sequence with name name """
    3.36 +        for name_test, description, body in self.get_all_strings():
    3.37 +            if name_test == name:
    3.38 +                return (name_test, description, body)
    3.39 +
    3.40 +class FastaIo(BaseIo):
    3.41 +    """ Fasta import and export
    3.42 +
    3.43 +    Additional data:
    3.44 +    * long_line - max length of file line while export
    3.45 +        Splits long lines to substrings of length=long_line
    3.46 +        To prevent this, set long_line=None
    3.47 +    """
    3.48 +
    3.49 +    def __init__(self, file, long_line=70):
    3.50 +        BaseIo.__init__(self, file)
    3.51 +        self.long_line = long_line
    3.52 +
    3.53 +    def save_string(self, string, name, description=''):
    3.54 +        if description:
    3.55 +            name += " " + description
    3.56 +        self.file.write(">%s\n" % name)
    3.57 +        if self.long_line:
    3.58 +            for i in range(0, len(string) // self.long_line + 1):
    3.59 +                start = i*self.long_line
    3.60 +                end = i*self.long_line + self.long_line
    3.61 +                self.file.write("%s\n" % string[start:end])
    3.62 +        else:
    3.63 +            self.file.write("%s\n" % string)
    3.64 +
    3.65 +    def get_all_strings(self):
    3.66 +        for part in self.file.read().split("\n>"):
    3.67 +            header, _, body = part.partition("\n")
    3.68 +            header = header.lstrip(">").strip()
    3.69 +            name, _, description = header.partition(" ")
    3.70 +            name = name.strip()
    3.71 +            description = description.strip()
    3.72 +            body = util.remove_each(body, " \n\r\t\v")
    3.73 +            yield (name, description, body)
    3.74 +
    3.75 +    def get_string(self, name):
    3.76 +        for name_test, description, body in self.get_all_strings():
    3.77 +            if name_test == name:
    3.78 +                return (name_test, description, body)
    3.79 +
    3.80 +class MsfIo(BaseIo):
    3.81 +    """ Msf import and export """
    3.82 +
    3.83 +    def __init__(self, file):
    3.84 +        BaseIo.__init__(self, file)
    3.85 +        self.tmp_fasta = NamedTemporaryFile(delete=False)
    3.86 +        self.tmp_fasta.close()
    3.87 +        os.system("seqret %(msf)s %(fasta)s" % \
    3.88 +            {'msf': self.file, 'fasta': self.tmp_fasta.name})
    3.89 +
    3.90 +    def save_string(self, string, name, description=''):
    3.91 +        self.tmp_fasta = open(tmp_fasta.name, 'a')
    3.92 +        fasta = FastaIo(self.tmp_fasta)
    3.93 +        fasta.save_string(string, name, description)
    3.94 +        self.tmp_fasta.close()
    3.95 +
    3.96 +    def get_all_strings(self):
    3.97 +        self.tmp_fasta = open(tmp_fasta.name)
    3.98 +        fasta = FastaIo(self.tmp_fasta)
    3.99 +        return fasta.get_all_strings()
   3.100 +