Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/737b52785e5e
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:51:40 2012
Кодировка:
allpy: 737b52785e5e

allpy

changeset 574:737b52785e5e

Clean rewrite of fileio
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Thu, 24 Mar 2011 20:46:06 +0300
parents 9054c1aae06c
children e131a2f3550e
files allpy/base.py allpy/fileio.py geometrical_core/geometrical-core
diffstat 3 files changed, 63 insertions(+), 98 deletions(-) [+]
line diff
     1.1 --- a/allpy/base.py	Thu Mar 24 20:45:32 2011 +0300
     1.2 +++ b/allpy/base.py	Thu Mar 24 20:46:06 2011 +0300
     1.3 @@ -201,13 +201,8 @@
     1.4          `gaps` set), treat them accordingly.
     1.5          """
     1.6          sequences = []
     1.7 -        if format == 'fasta':
     1.8 -            sequences = fileio.FastaIo(file).get_all_strings()
     1.9 -        elif format == 'msf':
    1.10 -            sequences = fileio.MsfIo(file).get_all_strings()
    1.11 -        else:
    1.12 -            raise Exception("We don't support other formats yet")
    1.13 -        for (name, description, body) in sequences:
    1.14 +        io = fileio.File(file, format)
    1.15 +        for name, description, body in io.read_strings():
    1.16              self.append_row_from_string(body, name, description, file.name, gaps)
    1.17          return self
    1.18  
    1.19 @@ -217,16 +212,11 @@
    1.20              if monomer:
    1.21                  return monomer.code1
    1.22              return gap
    1.23 -        if format == 'fasta':
    1.24 -            io = fileio.FastaIo(file)
    1.25 -        elif format == 'msf':
    1.26 -            io = fileio.MsfIo(file)
    1.27 -        else:
    1.28 -            raise Exception("We don't support other formats yet")
    1.29 +        io = fileio.File(file, format)
    1.30          for row in self.rows_as_lists():
    1.31              seq = row.sequence
    1.32              line = "".join(map(char, row))
    1.33 -            io.save_string(line, seq.name, seq.description)
    1.34 +            io.write_string(line, seq.name, seq.description)
    1.35  
    1.36      # Data access methods for alignment
    1.37      # =================================
     2.1 --- a/allpy/fileio.py	Thu Mar 24 20:45:32 2011 +0300
     2.2 +++ b/allpy/fileio.py	Thu Mar 24 20:46:06 2011 +0300
     2.3 @@ -1,107 +1,82 @@
     2.4  import os
     2.5  from tempfile import NamedTemporaryFile
     2.6 -
     2.7  import util
     2.8  
     2.9 -class BaseIo(object):
    2.10 -    """ Base class providing alignment/sequence import and export
    2.11 +class File(object):
    2.12 +    """Automatical file IO."""
    2.13 +    def __new__(cls, file, format="fasta"):
    2.14 +        if format == "fasta":
    2.15 +            return FastaFile(file)
    2.16 +        else:
    2.17 +            return EmbossFile(file, format)
    2.18  
    2.19 -    Data:
    2.20 -        * file - file object
    2.21 -    """
    2.22 +class FastaFile(object):
    2.23 +    """Fasta parser & writer."""
    2.24  
    2.25 -    def __init__(self, file):
    2.26 +    def __init__(self, file, wrap_column=70):
    2.27          self.file = file
    2.28 +        self.wrap_column = wrap_column
    2.29  
    2.30 -    def save_string(self, string, name, description=''):
    2.31 -        """ Saves given string to file
    2.32 -
    2.33 -        Splits long lines to substrings of length=long_line
    2.34 -        To prevent this, set long_line=None
    2.35 -        """
    2.36 -        pass
    2.37 -
    2.38 -    def get_all_strings(self):
    2.39 -        """Parse fasta file, remove spaces and newlines from sequence bodies.
    2.40 -
    2.41 -        Return a list of tuples (name, description, sequence_body).
    2.42 -        """
    2.43 -        pass
    2.44 -
    2.45 -    def get_string(self, name):
    2.46 -        """ return tuple (name, description, string) for sequence with name name """
    2.47 -        for name_test, description, body in self.get_all_strings():
    2.48 -            if name_test == name:
    2.49 -                return (name_test, description, body)
    2.50 -
    2.51 -class FastaIo(BaseIo):
    2.52 -    """ Fasta import and export
    2.53 -
    2.54 -    Additional data:
    2.55 -    * long_line - max length of file line while export
    2.56 -        Splits long lines to substrings of length=long_line
    2.57 -        To prevent this, set long_line=None
    2.58 -    """
    2.59 -
    2.60 -    def __init__(self, file, long_line=70):
    2.61 -        BaseIo.__init__(self, file)
    2.62 -        self.long_line = long_line
    2.63 -
    2.64 -    def save_string(self, string, name, description=''):
    2.65 +    def write_string(self, string, name, description=''):
    2.66 +        """Append one sequence to file."""
    2.67          if description:
    2.68              name += " " + description
    2.69          self.file.write(">%s\n" % name)
    2.70 -        if self.long_line:
    2.71 -            for i in range(0, len(string) // self.long_line + 1):
    2.72 -                start = i*self.long_line
    2.73 -                end = i*self.long_line + self.long_line
    2.74 -                self.file.write("%s\n" % string[start:end])
    2.75 +        if self.wrap_column:
    2.76 +            while string:
    2.77 +                self.file.write(string[:self.wrap_column]+"\n")
    2.78 +                string = string[self.wrap_column:]
    2.79          else:
    2.80 -            self.file.write("%s\n" % string)
    2.81 +            self.file.write(string+"\n")
    2.82 +        self.file.flush()
    2.83  
    2.84 -    def get_all_strings(self):
    2.85 +    def write_strings(self, sequences):
    2.86 +        """Write sequences to file.
    2.87 +
    2.88 +        Sequences are given as list of tuples (string, name, description).
    2.89 +        """
    2.90 +        for string, name, description in sequences:
    2.91 +            self.write_string(string, name, desription)
    2.92 +
    2.93 +    def read_strings(self):
    2.94          for part in self.file.read().split("\n>"):
    2.95              header, _, body = part.partition("\n")
    2.96 -            header = header.lstrip(">").strip()
    2.97 +            header = header.lstrip(">")
    2.98              name, _, description = header.partition(" ")
    2.99              name = name.strip()
   2.100              description = description.strip()
   2.101              body = util.remove_each(body, " \n\r\t\v")
   2.102              yield (name, description, body)
   2.103  
   2.104 -    def get_string(self, name):
   2.105 -        for name_test, description, body in self.get_all_strings():
   2.106 -            if name_test == name:
   2.107 -                return (name_test, description, body)
   2.108 +class EmbossFile(object):
   2.109 +    """Parser & writer for file formats supported by EMBOSS."""
   2.110  
   2.111 -class MsfIo(BaseIo):
   2.112 -    """ Msf import and export """
   2.113 +    def __init__(self, file, format):
   2.114 +        self.file = file
   2.115 +        self.format = format
   2.116  
   2.117 -    def __init__(self, file):
   2.118 -        BaseIo.__init__(self, file)
   2.119 +    def write_strings(self, sequences):
   2.120 +        """Write sequences to file."""
   2.121 +        # XXX: in case of exceptions files are not closed, nor unlinked
   2.122 +        tmpfile = NamedTemporaryFile('w', delete=False)
   2.123 +        FastaFile(tmpfile).write_strings(self.fix_sequences(sequences))
   2.124 +        tmpfile.close()
   2.125 +        os.system("seqret %s::%s %s" % (self.format, tmpfile, self.file.name))
   2.126 +        os.unlink(tmpfile)
   2.127  
   2.128 -    def save_string(self, string, name, description=''):
   2.129 -        name = name.replace(':', '_') # seqret bug
   2.130 -        tmp_fasta = NamedTemporaryFile('w', delete=False)
   2.131 -        tmp_fasta.close()
   2.132 -        os.system("seqret %(msf)s %(fasta)s" % \
   2.133 -            {'msf': self.file.name, 'fasta': tmp_fasta.name})
   2.134 -        tmp_fasta = open(tmp_fasta.name, 'a')
   2.135 -        fasta = FastaIo(tmp_fasta)
   2.136 -        fasta.save_string(string, name, description)
   2.137 -        tmp_fasta.close()
   2.138 -        self.file.close()
   2.139 -        os.system("seqret %(fasta)s msf::%(msf)s" % \
   2.140 -            {'msf': self.file.name, 'fasta': tmp_fasta.name})
   2.141 -        os.unlink(tmp_fasta.name)
   2.142 -        self.file = open(self.file.name)
   2.143 +    def fix_sequences(self, sequences):
   2.144 +        """EMBOSS does not permit : in file names. Fix sequences for that."""
   2.145 +        for name, description, sequence in sequences:
   2.146 +            yield name.replace(':', '_'), description, sequence
   2.147  
   2.148 -    def get_all_strings(self):
   2.149 -        tmp_fasta = NamedTemporaryFile(delete=False)
   2.150 -        os.system("seqret %(msf)s %(fasta)s" % \
   2.151 -            {'msf': self.file.name, 'fasta': tmp_fasta.name})
   2.152 -        fasta = FastaIo(tmp_fasta)
   2.153 -        strings = list(fasta.get_all_strings())
   2.154 -        os.unlink(tmp_fasta.name)
   2.155 -        return strings
   2.156 +    def read_strings(self):
   2.157 +        """Read sequences from file."""
   2.158 +        # XXX: in case of exceptions files are not closed, nor unlinked
   2.159 +        tmpfile = NamedTemporaryFile(delete=False)
   2.160 +        self.file.flush()
   2.161 +        os.system("seqret %s %s::%s" % (self.file.name, self.format, tmpfile))
   2.162 +        sequences = FastaFile(tmpfile).read_strings()
   2.163 +        os.unlink(tmpfile)
   2.164 +        return sequences
   2.165  
   2.166 +# vim: set et ts=4 sts=4 sw=4:
     3.1 --- a/geometrical_core/geometrical-core	Thu Mar 24 20:45:32 2011 +0300
     3.2 +++ b/geometrical_core/geometrical-core	Thu Mar 24 20:46:06 2011 +0300
     3.3 @@ -104,10 +104,10 @@
     3.4      IOs = []
     3.5      if args.f:
     3.6          block.to_file(args.f, format='fasta')
     3.7 -        IOs.append(fileio.FastaIo(args.f))
     3.8 +        IOs.append(fileio.File(args.f, format='fasta'))
     3.9      if args.g:
    3.10          block.to_file(args.g, format='msf')
    3.11 -        IOs.append(fileio.MsfIo(args.g))
    3.12 +        IOs.append(fileio.File(args.g, format='msf'))
    3.13      for i, GC in enumerate(GCs):
    3.14          for column in GC:
    3.15              m[column] = True
    3.16 @@ -116,7 +116,7 @@
    3.17          description = 'Main geometrical core' if i==0 \
    3.18              else 'Alternative geometrical core %i' % i
    3.19          for io in IOs:
    3.20 -            io.save_string(string, name, description)
    3.21 +            io.write_string(string, name, description)
    3.22          m.clear()
    3.23  
    3.24  if args.p: