allpy
changeset 574:737b52785e5e
Clean rewrite of fileio
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Thu, 24 Mar 2011 20:46:06 +0300 |
parents | 9054c1aae06c |
children | e131a2f3550e |
files | allpy/base.py allpy/fileio.py geometrical_core/geometrical-core |
diffstat | 3 files changed, 63 insertions(+), 98 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Thu Mar 24 20:45:32 2011 +0300 1.2 +++ b/allpy/base.py Thu Mar 24 20:46:06 2011 +0300 1.3 @@ -201,13 +201,8 @@ 1.4 `gaps` set), treat them accordingly. 1.5 """ 1.6 sequences = [] 1.7 - if format == 'fasta': 1.8 - sequences = fileio.FastaIo(file).get_all_strings() 1.9 - elif format == 'msf': 1.10 - sequences = fileio.MsfIo(file).get_all_strings() 1.11 - else: 1.12 - raise Exception("We don't support other formats yet") 1.13 - for (name, description, body) in sequences: 1.14 + io = fileio.File(file, format) 1.15 + for name, description, body in io.read_strings(): 1.16 self.append_row_from_string(body, name, description, file.name, gaps) 1.17 return self 1.18 1.19 @@ -217,16 +212,11 @@ 1.20 if monomer: 1.21 return monomer.code1 1.22 return gap 1.23 - if format == 'fasta': 1.24 - io = fileio.FastaIo(file) 1.25 - elif format == 'msf': 1.26 - io = fileio.MsfIo(file) 1.27 - else: 1.28 - raise Exception("We don't support other formats yet") 1.29 + io = fileio.File(file, format) 1.30 for row in self.rows_as_lists(): 1.31 seq = row.sequence 1.32 line = "".join(map(char, row)) 1.33 - io.save_string(line, seq.name, seq.description) 1.34 + io.write_string(line, seq.name, seq.description) 1.35 1.36 # Data access methods for alignment 1.37 # =================================
2.1 --- a/allpy/fileio.py Thu Mar 24 20:45:32 2011 +0300 2.2 +++ b/allpy/fileio.py Thu Mar 24 20:46:06 2011 +0300 2.3 @@ -1,107 +1,82 @@ 2.4 import os 2.5 from tempfile import NamedTemporaryFile 2.6 - 2.7 import util 2.8 2.9 -class BaseIo(object): 2.10 - """ Base class providing alignment/sequence import and export 2.11 +class File(object): 2.12 + """Automatical file IO.""" 2.13 + def __new__(cls, file, format="fasta"): 2.14 + if format == "fasta": 2.15 + return FastaFile(file) 2.16 + else: 2.17 + return EmbossFile(file, format) 2.18 2.19 - Data: 2.20 - * file - file object 2.21 - """ 2.22 +class FastaFile(object): 2.23 + """Fasta parser & writer.""" 2.24 2.25 - def __init__(self, file): 2.26 + def __init__(self, file, wrap_column=70): 2.27 self.file = file 2.28 + self.wrap_column = wrap_column 2.29 2.30 - def save_string(self, string, name, description=''): 2.31 - """ Saves given string to file 2.32 - 2.33 - Splits long lines to substrings of length=long_line 2.34 - To prevent this, set long_line=None 2.35 - """ 2.36 - pass 2.37 - 2.38 - def get_all_strings(self): 2.39 - """Parse fasta file, remove spaces and newlines from sequence bodies. 2.40 - 2.41 - Return a list of tuples (name, description, sequence_body). 2.42 - """ 2.43 - pass 2.44 - 2.45 - def get_string(self, name): 2.46 - """ return tuple (name, description, string) for sequence with name name """ 2.47 - for name_test, description, body in self.get_all_strings(): 2.48 - if name_test == name: 2.49 - return (name_test, description, body) 2.50 - 2.51 -class FastaIo(BaseIo): 2.52 - """ Fasta import and export 2.53 - 2.54 - Additional data: 2.55 - * long_line - max length of file line while export 2.56 - Splits long lines to substrings of length=long_line 2.57 - To prevent this, set long_line=None 2.58 - """ 2.59 - 2.60 - def __init__(self, file, long_line=70): 2.61 - BaseIo.__init__(self, file) 2.62 - self.long_line = long_line 2.63 - 2.64 - def save_string(self, string, name, description=''): 2.65 + def write_string(self, string, name, description=''): 2.66 + """Append one sequence to file.""" 2.67 if description: 2.68 name += " " + description 2.69 self.file.write(">%s\n" % name) 2.70 - if self.long_line: 2.71 - for i in range(0, len(string) // self.long_line + 1): 2.72 - start = i*self.long_line 2.73 - end = i*self.long_line + self.long_line 2.74 - self.file.write("%s\n" % string[start:end]) 2.75 + if self.wrap_column: 2.76 + while string: 2.77 + self.file.write(string[:self.wrap_column]+"\n") 2.78 + string = string[self.wrap_column:] 2.79 else: 2.80 - self.file.write("%s\n" % string) 2.81 + self.file.write(string+"\n") 2.82 + self.file.flush() 2.83 2.84 - def get_all_strings(self): 2.85 + def write_strings(self, sequences): 2.86 + """Write sequences to file. 2.87 + 2.88 + Sequences are given as list of tuples (string, name, description). 2.89 + """ 2.90 + for string, name, description in sequences: 2.91 + self.write_string(string, name, desription) 2.92 + 2.93 + def read_strings(self): 2.94 for part in self.file.read().split("\n>"): 2.95 header, _, body = part.partition("\n") 2.96 - header = header.lstrip(">").strip() 2.97 + header = header.lstrip(">") 2.98 name, _, description = header.partition(" ") 2.99 name = name.strip() 2.100 description = description.strip() 2.101 body = util.remove_each(body, " \n\r\t\v") 2.102 yield (name, description, body) 2.103 2.104 - def get_string(self, name): 2.105 - for name_test, description, body in self.get_all_strings(): 2.106 - if name_test == name: 2.107 - return (name_test, description, body) 2.108 +class EmbossFile(object): 2.109 + """Parser & writer for file formats supported by EMBOSS.""" 2.110 2.111 -class MsfIo(BaseIo): 2.112 - """ Msf import and export """ 2.113 + def __init__(self, file, format): 2.114 + self.file = file 2.115 + self.format = format 2.116 2.117 - def __init__(self, file): 2.118 - BaseIo.__init__(self, file) 2.119 + def write_strings(self, sequences): 2.120 + """Write sequences to file.""" 2.121 + # XXX: in case of exceptions files are not closed, nor unlinked 2.122 + tmpfile = NamedTemporaryFile('w', delete=False) 2.123 + FastaFile(tmpfile).write_strings(self.fix_sequences(sequences)) 2.124 + tmpfile.close() 2.125 + os.system("seqret %s::%s %s" % (self.format, tmpfile, self.file.name)) 2.126 + os.unlink(tmpfile) 2.127 2.128 - def save_string(self, string, name, description=''): 2.129 - name = name.replace(':', '_') # seqret bug 2.130 - tmp_fasta = NamedTemporaryFile('w', delete=False) 2.131 - tmp_fasta.close() 2.132 - os.system("seqret %(msf)s %(fasta)s" % \ 2.133 - {'msf': self.file.name, 'fasta': tmp_fasta.name}) 2.134 - tmp_fasta = open(tmp_fasta.name, 'a') 2.135 - fasta = FastaIo(tmp_fasta) 2.136 - fasta.save_string(string, name, description) 2.137 - tmp_fasta.close() 2.138 - self.file.close() 2.139 - os.system("seqret %(fasta)s msf::%(msf)s" % \ 2.140 - {'msf': self.file.name, 'fasta': tmp_fasta.name}) 2.141 - os.unlink(tmp_fasta.name) 2.142 - self.file = open(self.file.name) 2.143 + def fix_sequences(self, sequences): 2.144 + """EMBOSS does not permit : in file names. Fix sequences for that.""" 2.145 + for name, description, sequence in sequences: 2.146 + yield name.replace(':', '_'), description, sequence 2.147 2.148 - def get_all_strings(self): 2.149 - tmp_fasta = NamedTemporaryFile(delete=False) 2.150 - os.system("seqret %(msf)s %(fasta)s" % \ 2.151 - {'msf': self.file.name, 'fasta': tmp_fasta.name}) 2.152 - fasta = FastaIo(tmp_fasta) 2.153 - strings = list(fasta.get_all_strings()) 2.154 - os.unlink(tmp_fasta.name) 2.155 - return strings 2.156 + def read_strings(self): 2.157 + """Read sequences from file.""" 2.158 + # XXX: in case of exceptions files are not closed, nor unlinked 2.159 + tmpfile = NamedTemporaryFile(delete=False) 2.160 + self.file.flush() 2.161 + os.system("seqret %s %s::%s" % (self.file.name, self.format, tmpfile)) 2.162 + sequences = FastaFile(tmpfile).read_strings() 2.163 + os.unlink(tmpfile) 2.164 + return sequences 2.165 2.166 +# vim: set et ts=4 sts=4 sw=4:
3.1 --- a/geometrical_core/geometrical-core Thu Mar 24 20:45:32 2011 +0300 3.2 +++ b/geometrical_core/geometrical-core Thu Mar 24 20:46:06 2011 +0300 3.3 @@ -104,10 +104,10 @@ 3.4 IOs = [] 3.5 if args.f: 3.6 block.to_file(args.f, format='fasta') 3.7 - IOs.append(fileio.FastaIo(args.f)) 3.8 + IOs.append(fileio.File(args.f, format='fasta')) 3.9 if args.g: 3.10 block.to_file(args.g, format='msf') 3.11 - IOs.append(fileio.MsfIo(args.g)) 3.12 + IOs.append(fileio.File(args.g, format='msf')) 3.13 for i, GC in enumerate(GCs): 3.14 for column in GC: 3.15 m[column] = True 3.16 @@ -116,7 +116,7 @@ 3.17 description = 'Main geometrical core' if i==0 \ 3.18 else 'Alternative geometrical core %i' % i 3.19 for io in IOs: 3.20 - io.save_string(string, name, description) 3.21 + io.write_string(string, name, description) 3.22 m.clear() 3.23 3.24 if args.p: