Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/80043822a41e/allpy/fileio.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 03:28:34 2013
Кодировка:
allpy: 80043822a41e allpy/fileio.py

allpy

view allpy/fileio.py @ 704:80043822a41e

Added fileio.File.read_alignment and fileio.File.write_alignment [closes #57] This is now the recommended interface. In the following commits this interface will be used to write markup together with alignment. Previously, fileio received the alignment torn-apart into sequence representations as strings with gaps, sequence names and descriptions. Now, fileio tears the alignment apart by itself.
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Thu, 07 Jul 2011 19:21:12 +0400
parents 69079d72d207
children 21cfc7897a8f
line source
1 import os
2 from subprocess import Popen, PIPE
3 from tempfile import NamedTemporaryFile
4 import util
6 class File(object):
7 """Automatical file IO."""
8 def __new__(cls, file, format="fasta", **kw):
9 if format == "fasta":
10 return FastaFile(file, **kw)
11 else:
12 return EmbossFile(file, format, **kw)
14 class AlignmentFile(object):
15 """Some helpers."""
17 def __init__(self, file, format='fasta', gaps='-', wrap_column=70):
18 self.file = file
19 self.format = format
20 self.gaps = gaps
21 self.wrap_column = wrap_column
23 def write_alignment(self, alignment):
24 """Append alignment to the file."""
25 self.write_strings(
26 (row, row.sequence.name, row.sequence.description)
27 for row in alignment.rows_as_strings(self.gaps)
28 )
30 def read_alignment(self, alignment):
31 """Read alignment from the file."""
32 append_row = alignment.append_row_from_string
33 for name, description, body in self.read_strings():
34 append_row(body, name, description, file.name, self.gaps)
36 class FastaFile(AlignmentFile):
37 """Fasta parser & writer."""
39 def write_string(self, string, name, description=''):
40 """Append one sequence to file."""
41 if description:
42 name += " " + description
43 self.file.write(">%s\n" % name)
44 if self.wrap_column:
45 while string:
46 self.file.write(string[:self.wrap_column]+"\n")
47 string = string[self.wrap_column:]
48 else:
49 self.file.write(string+"\n")
50 self.file.flush()
52 def write_strings(self, sequences):
53 """Write sequences to file.
55 Sequences are given as list of tuples (string, name, description).
56 """
57 for string, name, description in sequences:
58 self.write_string(string, name, description)
60 def read_strings(self):
61 for part in self.file.read().split("\n>"):
62 header, _, body = part.partition("\n")
63 header = header.lstrip(">")
64 name, _, description = header.partition(" ")
65 name = name.strip()
66 description = description.strip()
67 body = util.remove_each(body, " \n\r\t\v")
68 yield (name, description, body)
70 class EmbossFile(AlignmentFile):
71 """Parser & writer for file formats supported by EMBOSS."""
73 def write_strings(self, sequences):
74 """Write sequences to file."""
75 pipe = Popen(['seqret', 'stdin', '%s::stdout' % self.format],
76 stdin=PIPE, stdout=PIPE
77 )
78 FastaFile(pipe.stdin).write_strings(self.fix_sequences(sequences))
79 pipe.stdin.close()
80 for line in pipe.stdout:
81 self.file.write(line)
83 def fix_sequences(self, sequences):
84 """EMBOSS does not permit : in file names. Fix sequences for that."""
85 for name, description, sequence in sequences:
86 yield name.replace(':', '_'), description, sequence
88 def read_strings(self):
89 """Read sequences from file."""
90 pipe = Popen(['seqret', '%s::stdin' % self.format, 'stdout'],
91 stdin=PIPE, stdout=PIPE
92 )
93 for line in self.file:
94 pipe.stdin.write(line)
95 pipe.stdin.close()
96 return FastaFile(pipe.stdout).read_strings()
98 # vim: set et ts=4 sts=4 sw=4: