allpy: 88c246f20918 allpy/fileio.py

allpy

view allpy/fileio.py @ 645:88c246f20918

Fixed monomer pickling to avoid name clashes. This breaks pickle backwards-compatiblity! [see #35] Previosly all monomer classes were stored a single namespace, allpy.data.monomers. This caused a few name clashes, which were mostly resolved, and one name clash, that was not. (This caused one class to be named differently depending on the order in which modules were loaded). Now, instead of one allpy.data.monomers module we have allpy.data.monomers package with modules dna, rna, protein. This ensures that all name clashes are resolved uniformly upon any sequence of modules loading. This may also help in future to keep backward-compatibility longer in case we replace dynaminc monomer class creation with storing the classes in the module - if we want to retain independent loading of dna/rna/protein parts.

author	Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date	Wed, 08 Jun 2011 21:31:02 +0400
parents	b6a53615f1e9
children	80043822a41e

line source

1 import os

2 from subprocess import Popen, PIPE

3 from tempfile import NamedTemporaryFile

4 import util

6 class File(object):

7 """Automatical file IO."""

8 def __new__(cls, file, format="fasta"):

9 if format == "fasta":

10 return FastaFile(file)

11 else:

12 return EmbossFile(file, format)

14 class FastaFile(object):

15 """Fasta parser & writer."""

17 def __init__(self, file, wrap_column=70):

18 self.file = file

19 self.wrap_column = wrap_column

21 def write_string(self, string, name, description=''):

22 """Append one sequence to file."""

23 if description:

24 name += " " + description

25 self.file.write(">%s\n" % name)

26 if self.wrap_column:

27 while string:

28 self.file.write(string[:self.wrap_column]+"\n")

29 string = string[self.wrap_column:]

30 else:

31 self.file.write(string+"\n")

32 self.file.flush()

34 def write_strings(self, sequences):

35 """Write sequences to file.

37 Sequences are given as list of tuples (string, name, description).

38 """

39 for string, name, description in sequences:

40 self.write_string(string, name, description)

42 def read_strings(self):

43 for part in self.file.read().split("\n>"):

44 header, _, body = part.partition("\n")

45 header = header.lstrip(">")

46 name, _, description = header.partition(" ")

47 name = name.strip()

48 description = description.strip()

49 body = util.remove_each(body, " \n\r\t\v")

50 yield (name, description, body)

52 class EmbossFile(object):

53 """Parser & writer for file formats supported by EMBOSS."""

55 def __init__(self, file, format):

56 self.file = file

57 self.format = format

59 def write_strings(self, sequences):

60 """Write sequences to file."""

61 pipe = Popen(['seqret', 'stdin', '%s::stdout' % self.format],

62 stdin=PIPE, stdout=PIPE

63 )

64 FastaFile(pipe.stdin).write_strings(self.fix_sequences(sequences))

65 pipe.stdin.close()

66 for line in pipe.stdout:

67 self.file.write(line)

69 def fix_sequences(self, sequences):

70 """EMBOSS does not permit : in file names. Fix sequences for that."""

71 for name, description, sequence in sequences:

72 yield name.replace(':', '_'), description, sequence

74 def read_strings(self):

75 """Read sequences from file."""

76 pipe = Popen(['seqret', '%s::stdin' % self.format, 'stdout'],

77 stdin=PIPE, stdout=PIPE

78 )

79 for line in self.file:

80 pipe.stdin.write(line)

81 pipe.stdin.close()

82 return FastaFile(pipe.stdout).read_strings()

84 # vim: set et ts=4 sts=4 sw=4: