Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/file/d2b7e6a16be6/allpy/fileio.py
Дата изменения: Unknown
Дата индексирования: Mon Feb 4 03:40:52 2013
Кодировка:
allpy: d2b7e6a16be6 allpy/fileio.py

allpy

view allpy/fileio.py @ 569:d2b7e6a16be6

processors: add no_gaps parameter to Muscle processor muscle might use some information from input alignment when it contains gaps
author boris (kodomo) <bnagaev@gmail.com>
date Thu, 17 Mar 2011 16:56:48 +0300
parents 5dfb9b9761d5
children ca394f2298e5
line source
1 import os
2 from tempfile import NamedTemporaryFile
4 import util
6 class BaseIo(object):
7 """ Base class providing alignment/sequence import and export
9 Data:
10 * file - file object
11 """
13 def __init__(self, file):
14 self.file = file
16 def save_string(self, string, name, description=''):
17 """ Saves given string to file
19 Splits long lines to substrings of length=long_line
20 To prevent this, set long_line=None
21 """
22 pass
24 def get_all_strings(self):
25 """Parse fasta file, remove spaces and newlines from sequence bodies.
27 Return a list of tuples (name, description, sequence_body).
28 """
29 pass
31 def get_string(self, name):
32 """ return tuple (name, description, string) for sequence with name name """
33 for name_test, description, body in self.get_all_strings():
34 if name_test == name:
35 return (name_test, description, body)
37 class FastaIo(BaseIo):
38 """ Fasta import and export
40 Additional data:
41 * long_line - max length of file line while export
42 Splits long lines to substrings of length=long_line
43 To prevent this, set long_line=None
44 """
46 def __init__(self, file, long_line=70):
47 BaseIo.__init__(self, file)
48 self.long_line = long_line
50 def save_string(self, string, name, description=''):
51 if description:
52 name += " " + description
53 self.file.write(">%s\n" % name)
54 if self.long_line:
55 for i in range(0, len(string) // self.long_line + 1):
56 start = i*self.long_line
57 end = i*self.long_line + self.long_line
58 self.file.write("%s\n" % string[start:end])
59 else:
60 self.file.write("%s\n" % string)
62 def get_all_strings(self):
63 for part in self.file.read().split("\n>"):
64 header, _, body = part.partition("\n")
65 header = header.lstrip(">").strip()
66 name, _, description = header.partition(" ")
67 name = name.strip()
68 description = description.strip()
69 body = util.remove_each(body, " \n\r\t\v")
70 yield (name, description, body)
72 def get_string(self, name):
73 for name_test, description, body in self.get_all_strings():
74 if name_test == name:
75 return (name_test, description, body)
77 class MsfIo(BaseIo):
78 """ Msf import and export """
80 def __init__(self, file):
81 BaseIo.__init__(self, file)
83 def save_string(self, string, name, description=''):
84 name = name.replace(':', '_') # seqret bug
85 tmp_fasta = NamedTemporaryFile('w', delete=False)
86 tmp_fasta.close()
87 if os.path.getsize(self.file.name):
88 os.system("seqret %(msf)s %(fasta)s" % \
89 {'msf': self.file.name, 'fasta': tmp_fasta.name})
90 tmp_fasta = open(tmp_fasta.name, 'a')
91 fasta = FastaIo(tmp_fasta)
92 fasta.save_string(string, name, description)
93 tmp_fasta.close()
94 self.file.close()
95 os.system("seqret %(fasta)s msf::%(msf)s" % \
96 {'msf': self.file.name, 'fasta': tmp_fasta.name})
97 os.unlink(tmp_fasta.name)
98 self.file = open(self.file.name)
100 def get_all_strings(self):
101 tmp_fasta = NamedTemporaryFile(delete=False)
102 os.system("seqret %(msf)s %(fasta)s" % \
103 {'msf': self.file.name, 'fasta': tmp_fasta.name})
104 fasta = FastaIo(tmp_fasta)
105 strings = list(fasta.get_all_strings())
106 os.unlink(tmp_fasta.name)
107 return strings