Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/afed1fd8920c
Дата изменения: Unknown
Дата индексирования: Tue Oct 2 00:39:54 2012
Кодировка:

Поисковые слова: m 31
allpy: afed1fd8920c

allpy

changeset 1091:afed1fd8920c

Added backreferences to `Seqeunce`s from `Monomer`s (closes #49) WARNING! Please note that `Sequence` API almost changed entirely! WARNING! This commit immediately obsoletes classmethods `Monomer.from_code*`, `Monomer.from_name` and `Sequence.from_monomers`. Turns out, python can not pickle sets/dicts which have keys, which inderecly reference the set/dict itself: http://bugs.python.org/issue9269 -- which is excatly what we have in abundance after this change. To allow pickling added `__getstate__` to `Monomer` to return all attributes, except `sequence` and `__setstate__` to `Sequence`, which runs through all monomers and returns the `sequence` attribute back to where it belongs. WARNING! This MAY result in unexpected behaviour in some cases. (Which should be rare enough).
author Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru>
date Sat, 02 Jun 2012 19:33:42 +0400
parents 73f57a30831f
children 6b32ed63555a
files allpy/base.py allpy/dna.py allpy/structure.py blocks3d/blocks3d.py pair_cores/mkhtml.py pair_cores/pair_cores.py pair_cores/rasmol_output.py test/test_base.py test/test_pickle.py
diffstat 9 files changed, 103 insertions(+), 57 deletions(-) [+]
line diff
     1.1 --- a/allpy/base.py	Sat Jun 02 19:29:40 2012 +0400
     1.2 +++ b/allpy/base.py	Sat Jun 02 19:33:42 2012 +0400
     1.3 @@ -29,6 +29,9 @@
     1.4      by_name = {}
     1.5      """A mapping from full monomer name to Monomer subclass."""
     1.6  
     1.7 +    sequence = None
     1.8 +    """A sequence the monomer belongs to."""
     1.9 +
    1.10      @classmethod
    1.11      def _subclass(cls, name='', code1='', code3='', is_modified=False):
    1.12          """Create new subclass of Monomer for given monomer type."""
    1.13 @@ -67,23 +70,6 @@
    1.14          for code1, is_modified, code3, name in codes:
    1.15              cls._subclass(name, code1, code3, is_modified)
    1.16  
    1.17 -    @classmethod
    1.18 -    def from_code1(cls, code1):
    1.19 -        """Create new monomer from 1-letter code."""
    1.20 -        monomer = cls.by_code1[code1.upper()]()
    1.21 -        monomer.input_code1 = code1
    1.22 -        return monomer
    1.23 -
    1.24 -    @classmethod
    1.25 -    def from_code3(cls, code3):
    1.26 -        """Create new monomer from 3-letter code."""
    1.27 -        return cls.by_code3[code3.upper()]()
    1.28 -
    1.29 -    @classmethod
    1.30 -    def from_name(cls, name):
    1.31 -        """Create new monomer from full name."""
    1.32 -        return cls.by_name[name.strip().capitalize()]()
    1.33 -
    1.34      def __repr__(self):
    1.35          return "<Monomer %s>"  % str(self.code1)
    1.36  
    1.37 @@ -101,6 +87,30 @@
    1.38      def __ne__(self, other):
    1.39          return not (self == other)
    1.40  
    1.41 +    def __getstate__(self):
    1.42 +        """Overcome difficulties with pickle.
    1.43 +
    1.44 +        Pickle is unable to store `set`s/`dict`s that have objects referencing
    1.45 +        back the `set`/`dict` itself, which `sequence` in monomer does.
    1.46 +        ( http://bugs.python.org/issue9269 )
    1.47 +
    1.48 +        To sidestep the bug we store the monomer WITHOUT `sequence` attribute.
    1.49 +
    1.50 +        See also `Sequence.__setstate__`.
    1.51 +        """
    1.52 +        state = {}
    1.53 +        state.update(vars(self))
    1.54 +        if 'sequence' in state:
    1.55 +            del state['sequence']
    1.56 +        return state
    1.57 +
    1.58 +    def _obsolete_method(cls, *args, **kws):
    1.59 +        """OBSOLETE"""
    1.60 +        raise AttributeError("Call to obsolete method.")
    1.61 +    from_code1 = classmethod(_obsolete_method)
    1.62 +    from_code3 = classmethod(_obsolete_method)
    1.63 +    from_name = classmethod(_obsolete_method)
    1.64 +
    1.65  class MarkupContainerMixin(object):
    1.66      """Common functions for alignment and sequence for dealing with markups.
    1.67      """
    1.68 @@ -162,31 +172,45 @@
    1.69      """Description of object kind."""
    1.70  
    1.71      name = ''
    1.72 +    """Squence identifier."""
    1.73 +
    1.74      description = ''
    1.75 +    """Detailed sequence description."""
    1.76 +
    1.77      source = ''
    1.78 +    """Sequence source."""
    1.79  
    1.80 -    def __init__(self, *args):
    1.81 -        list.__init__(self, *args)
    1.82 +    def __init__(self, sequence=(), name='', description='', source=''):
    1.83 +        list.__init__(self, sequence)
    1.84          MarkupContainerMixin._init(self)
    1.85  
    1.86 -    @classmethod
    1.87 -    def from_monomers(cls, monomers=[], name=None, description=None, source=None):
    1.88 -        """Create sequence from a list of monomer objecst."""
    1.89 -        result = cls(monomers)
    1.90 -        if name:
    1.91 -            result.name = name
    1.92 -        if description:
    1.93 -            result.description = description
    1.94 -        if source:
    1.95 -            result.source = source
    1.96 -        return result
    1.97 +        self.name = name
    1.98 +        self.description = description
    1.99 +        self.source = source
   1.100 +
   1.101 +    def append_monomer(self, code1=None, code3=None, name=None):
   1.102 +        """Append a new monomer to the sequence. Return the new monomer."""
   1.103 +        assert bool(code1) + bool(code3) + bool(name) == 1, \
   1.104 +            "Please specify exactly one of: code1, code3, name"
   1.105 +        if code1:
   1.106 +            cls = self.types.Monomer.by_code1[code1.upper()]
   1.107 +        elif code3:
   1.108 +            cls = self.types.Monomer.by_code3[code3.upper()]
   1.109 +        elif name:
   1.110 +            cls = self.types.Monomer.by_name[name.strip().capitalize()]
   1.111 +        monomer = cls()
   1.112 +        monomer.sequence = self
   1.113 +        monomer.input_code1 = code1
   1.114 +        self.append(monomer)
   1.115 +        return monomer
   1.116  
   1.117      @classmethod
   1.118      def from_string(cls, string, name='', description='', source=''):
   1.119          """Create sequences from string of one-letter codes."""
   1.120 -        monomer = cls.types.Monomer.from_code1
   1.121 -        monomers = [monomer(letter) for letter in string]
   1.122 -        return cls.from_monomers(monomers, name, description, source)
   1.123 +        self = cls([], name=name, description=description, source=source)
   1.124 +        for letter in string:
   1.125 +            self.append_monomer(code1=letter)
   1.126 +        return self
   1.127  
   1.128      def __repr__(self):
   1.129          if self.name:
   1.130 @@ -202,6 +226,26 @@
   1.131          """Hash sequence by identity."""
   1.132          return id(self)
   1.133  
   1.134 +    def __setstate__(self, state):
   1.135 +        """Overcome difficulties with pickle: add `monomer.sequence` after loading.
   1.136 +
   1.137 +        Pickle is unable to store `set`s/`dict`s that have objects referencing
   1.138 +        back the `set`/`dict` itself, which `sequence` in monomer does.
   1.139 +        ( http://bugs.python.org/issue9269 )
   1.140 +
   1.141 +        To sidestep the bug we store the monomer WITHOUT `sequence` attribute.
   1.142 +
   1.143 +        See also `Monomer.__getstate__`.
   1.144 +        """
   1.145 +        vars(self).update(state)
   1.146 +        for monomer in self:
   1.147 +            monomer.sequence = self
   1.148 +
   1.149 +    @classmethod
   1.150 +    def from_monomers(cls, *args, **kws):
   1.151 +        """OBSOLETE."""
   1.152 +        raise AttributeError("Sequence.from_monomers is obsolete")
   1.153 +
   1.154  class Alignment(MarkupContainerMixin):
   1.155      """Alignment. It is a list of Columns."""
   1.156  
     2.1 --- a/allpy/dna.py	Sat Jun 02 19:29:40 2012 +0400
     2.2 +++ b/allpy/dna.py	Sat Jun 02 19:33:42 2012 +0400
     2.3 @@ -20,19 +20,15 @@
     2.4  
     2.5          Name of the sequence is name of self with apostrophe added.
     2.6          """
     2.7 -        from_monomers = self.types.Sequence.from_monomers
     2.8 -        from_code1 = self.types.Monomer.from_code1
     2.9          complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    2.10 -        complemented_monomers = [
    2.11 -            from_code1(complement.get(monomer.code1, 'N'))
    2.12 -            for monomer in self
    2.13 -        ]
    2.14 -        return from_monomers(
    2.15 -            reversed(complemented_monomers),
    2.16 -            self.name + "'",
    2.17 -            self.description,
    2.18 -            self.source
    2.19 +        result = self.types.Sequence(
    2.20 +            name=self.name +"'",
    2.21 +            description=self.description,
    2.22 +            source=self.source
    2.23          )
    2.24 +        for monomer in reversed(self):
    2.25 +            result.append_monomer(complement.get(monomer.code1, 'N'))
    2.26 +        return result
    2.27  
    2.28  class Column(base.Column):
    2.29      types = dna
     3.1 --- a/allpy/structure.py	Sat Jun 02 19:29:40 2012 +0400
     3.2 +++ b/allpy/structure.py	Sat Jun 02 19:33:42 2012 +0400
     3.3 @@ -217,15 +217,13 @@
     3.4          cappbuilder = CaPPBuilder()
     3.5          peptides = cappbuilder.build_peptides(chain)
     3.6          Sequence = cls
     3.7 -        Monomer = Sequence.types.Monomer
     3.8          sequence = Sequence()
     3.9          sequence.pdb_chain = chain
    3.10          for peptide in peptides:
    3.11              for ca_atom in peptide.get_ca_list():
    3.12                  residue = ca_atom.get_parent()
    3.13 -                monomer = Monomer.from_code3(residue.get_resname())
    3.14 +                monomer = sequence.append_monomer(code3=residue.get_resname())
    3.15                  monomer.pdb_residue = residue
    3.16 -                sequence.append(monomer)
    3.17          return sequence
    3.18  
    3.19      def auto_pdb(self, conformity=None, pdb_getter=download_pdb, xyz_only=False,
     4.1 --- a/blocks3d/blocks3d.py	Sat Jun 02 19:29:40 2012 +0400
     4.2 +++ b/blocks3d/blocks3d.py	Sat Jun 02 19:33:42 2012 +0400
     4.3 @@ -12,7 +12,7 @@
     4.4  from allpy.argparse_validators import f_nng, part, timeout, pos, i_nng
     4.5  from allpy.structure import CachedDownloadPdb
     4.6  
     4.7 -from protein_pdb import Alignment, Block, Monomer, Sequence
     4.8 +from protein_pdb import Alignment, Block, Sequence
     4.9  from html import html_template
    4.10  
    4.11  r = argparse.FileType('r')
     5.1 --- a/pair_cores/mkhtml.py	Sat Jun 02 19:29:40 2012 +0400
     5.2 +++ b/pair_cores/mkhtml.py	Sat Jun 02 19:33:42 2012 +0400
     5.3 @@ -4,7 +4,7 @@
     5.4  import json
     5.5  import sys
     5.6  
     5.7 -from protein_pdb import Alignment, Block, Monomer, Sequence
     5.8 +from protein_pdb import Alignment, Block, Sequence
     5.9  from html import html_template
    5.10  
    5.11  fasta_file = sys.argv[1]
     6.1 --- a/pair_cores/pair_cores.py	Sat Jun 02 19:29:40 2012 +0400
     6.2 +++ b/pair_cores/pair_cores.py	Sat Jun 02 19:33:42 2012 +0400
     6.3 @@ -9,7 +9,7 @@
     6.4  
     6.5  from allpy import config
     6.6  from allpy.argparse_validators import timeout
     6.7 -from protein_pdb import Alignment, Block, Monomer, Sequence
     6.8 +from protein_pdb import Alignment, Block, Sequence
     6.9  from allpy import processors, markups
    6.10  import allpy.base
    6.11  from html import html_template
     7.1 --- a/pair_cores/rasmol_output.py	Sat Jun 02 19:29:40 2012 +0400
     7.2 +++ b/pair_cores/rasmol_output.py	Sat Jun 02 19:33:42 2012 +0400
     7.3 @@ -4,7 +4,7 @@
     7.4  from gzip import GzipFile
     7.5  from copy import copy
     7.6  
     7.7 -from protein_pdb import Alignment, Block, Monomer, Sequence
     7.8 +from protein_pdb import Alignment, Block, Sequence
     7.9  from allpy import structure
    7.10  
    7.11  def load(pdb_code):
     8.1 --- a/test/test_base.py	Sat Jun 02 19:29:40 2012 +0400
     8.2 +++ b/test/test_base.py	Sat Jun 02 19:33:42 2012 +0400
     8.3 @@ -1,7 +1,7 @@
     8.4  import re
     8.5  from StringIO import StringIO
     8.6  
     8.7 -import allpy.base as b
     8.8 +import allpy.base
     8.9  import allpy.protein as p
    8.10  from allpy import processors
    8.11  
    8.12 @@ -16,21 +16,28 @@
    8.13  
    8.14  def test_new_monomers():
    8.15      """Test creation of monomer objects"""
    8.16 +    s = allpy.base.Sequence()
    8.17  
    8.18      try:
    8.19 -        m = b.Monomer.from_code1('A')
    8.20 +        m = s.append_monomer(code1='A')
    8.21      except Exception:
    8.22          pass
    8.23      else:
    8.24 -        assert False, "base.Monomers must not be constructible from code1"
    8.25 +        assert False, "base.Seqeuence must not be constructible from code1"
    8.26  
    8.27 -    m = b.Monomer.from_code3('ALA')
    8.28 +    m = p.Sequence().append_monomer(code3='ALA')
    8.29      assert m.__class__.__name__ == "Alanine"
    8.30      assert m.code1 == "A"
    8.31      assert m.code3 == "ALA"
    8.32      assert m.name == "Alanine"
    8.33  
    8.34 -    m = b.Monomer.from_name("alaNINE")
    8.35 +    m = s.append_monomer(code3='ALA')
    8.36 +    assert m.__class__.__name__ == "Alanine"
    8.37 +    assert m.code1 == "A"
    8.38 +    assert m.code3 == "ALA"
    8.39 +    assert m.name == "Alanine"
    8.40 +
    8.41 +    m = s.append_monomer(name="alaNINE")
    8.42      assert m.__class__.__name__ == "Alanine"
    8.43      assert m.code1 == "A"
    8.44      assert m.code3 == "ALA"
    8.45 @@ -38,7 +45,7 @@
    8.46  
    8.47      # This actually tests for proper overriding when there are many monomers
    8.48      # with the same code3. The unmodified one takes precedence.
    8.49 -    m = b.Monomer.from_code3('SEC')
    8.50 +    m = s.append_monomer(code3='SEC')
    8.51      assert m.name == "Selenocysteine"
    8.52  
    8.53  def assert_alignment(alignment, *body):
     9.1 --- a/test/test_pickle.py	Sat Jun 02 19:29:40 2012 +0400
     9.2 +++ b/test/test_pickle.py	Sat Jun 02 19:33:42 2012 +0400
     9.3 @@ -18,5 +18,6 @@
     9.4      s = pickle.load(file)
     9.5      assert s.name == 'sequence'
     9.6      assert str(s) == 'SEQVENCE'
     9.7 +    assert s[0].sequence is s
     9.8  
     9.9  # vim: set et ts=4 sts=4 sw=4: