allpy
changeset 1091:afed1fd8920c
Added backreferences to `Seqeunce`s from `Monomer`s (closes #49)
WARNING! Please note that `Sequence` API almost changed entirely!
WARNING! This commit immediately obsoletes classmethods `Monomer.from_code*`,
`Monomer.from_name` and `Sequence.from_monomers`.
Turns out, python can not pickle sets/dicts which have keys, which inderecly
reference the set/dict itself: http://bugs.python.org/issue9269 -- which is
excatly what we have in abundance after this change.
To allow pickling added `__getstate__` to `Monomer` to return all attributes,
except `sequence` and `__setstate__` to `Sequence`, which runs through all
monomers and returns the `sequence` attribute back to where it belongs.
WARNING! This MAY result in unexpected behaviour in some cases. (Which should
be rare enough).
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Sat, 02 Jun 2012 19:33:42 +0400 |
parents | 73f57a30831f |
children | 6b32ed63555a |
files | allpy/base.py allpy/dna.py allpy/structure.py blocks3d/blocks3d.py pair_cores/mkhtml.py pair_cores/pair_cores.py pair_cores/rasmol_output.py test/test_base.py test/test_pickle.py |
diffstat | 9 files changed, 103 insertions(+), 57 deletions(-) [+] |
line diff
1.1 --- a/allpy/base.py Sat Jun 02 19:29:40 2012 +0400 1.2 +++ b/allpy/base.py Sat Jun 02 19:33:42 2012 +0400 1.3 @@ -29,6 +29,9 @@ 1.4 by_name = {} 1.5 """A mapping from full monomer name to Monomer subclass.""" 1.6 1.7 + sequence = None 1.8 + """A sequence the monomer belongs to.""" 1.9 + 1.10 @classmethod 1.11 def _subclass(cls, name='', code1='', code3='', is_modified=False): 1.12 """Create new subclass of Monomer for given monomer type.""" 1.13 @@ -67,23 +70,6 @@ 1.14 for code1, is_modified, code3, name in codes: 1.15 cls._subclass(name, code1, code3, is_modified) 1.16 1.17 - @classmethod 1.18 - def from_code1(cls, code1): 1.19 - """Create new monomer from 1-letter code.""" 1.20 - monomer = cls.by_code1[code1.upper()]() 1.21 - monomer.input_code1 = code1 1.22 - return monomer 1.23 - 1.24 - @classmethod 1.25 - def from_code3(cls, code3): 1.26 - """Create new monomer from 3-letter code.""" 1.27 - return cls.by_code3[code3.upper()]() 1.28 - 1.29 - @classmethod 1.30 - def from_name(cls, name): 1.31 - """Create new monomer from full name.""" 1.32 - return cls.by_name[name.strip().capitalize()]() 1.33 - 1.34 def __repr__(self): 1.35 return "<Monomer %s>" % str(self.code1) 1.36 1.37 @@ -101,6 +87,30 @@ 1.38 def __ne__(self, other): 1.39 return not (self == other) 1.40 1.41 + def __getstate__(self): 1.42 + """Overcome difficulties with pickle. 1.43 + 1.44 + Pickle is unable to store `set`s/`dict`s that have objects referencing 1.45 + back the `set`/`dict` itself, which `sequence` in monomer does. 1.46 + ( http://bugs.python.org/issue9269 ) 1.47 + 1.48 + To sidestep the bug we store the monomer WITHOUT `sequence` attribute. 1.49 + 1.50 + See also `Sequence.__setstate__`. 1.51 + """ 1.52 + state = {} 1.53 + state.update(vars(self)) 1.54 + if 'sequence' in state: 1.55 + del state['sequence'] 1.56 + return state 1.57 + 1.58 + def _obsolete_method(cls, *args, **kws): 1.59 + """OBSOLETE""" 1.60 + raise AttributeError("Call to obsolete method.") 1.61 + from_code1 = classmethod(_obsolete_method) 1.62 + from_code3 = classmethod(_obsolete_method) 1.63 + from_name = classmethod(_obsolete_method) 1.64 + 1.65 class MarkupContainerMixin(object): 1.66 """Common functions for alignment and sequence for dealing with markups. 1.67 """ 1.68 @@ -162,31 +172,45 @@ 1.69 """Description of object kind.""" 1.70 1.71 name = '' 1.72 + """Squence identifier.""" 1.73 + 1.74 description = '' 1.75 + """Detailed sequence description.""" 1.76 + 1.77 source = '' 1.78 + """Sequence source.""" 1.79 1.80 - def __init__(self, *args): 1.81 - list.__init__(self, *args) 1.82 + def __init__(self, sequence=(), name='', description='', source=''): 1.83 + list.__init__(self, sequence) 1.84 MarkupContainerMixin._init(self) 1.85 1.86 - @classmethod 1.87 - def from_monomers(cls, monomers=[], name=None, description=None, source=None): 1.88 - """Create sequence from a list of monomer objecst.""" 1.89 - result = cls(monomers) 1.90 - if name: 1.91 - result.name = name 1.92 - if description: 1.93 - result.description = description 1.94 - if source: 1.95 - result.source = source 1.96 - return result 1.97 + self.name = name 1.98 + self.description = description 1.99 + self.source = source 1.100 + 1.101 + def append_monomer(self, code1=None, code3=None, name=None): 1.102 + """Append a new monomer to the sequence. Return the new monomer.""" 1.103 + assert bool(code1) + bool(code3) + bool(name) == 1, \ 1.104 + "Please specify exactly one of: code1, code3, name" 1.105 + if code1: 1.106 + cls = self.types.Monomer.by_code1[code1.upper()] 1.107 + elif code3: 1.108 + cls = self.types.Monomer.by_code3[code3.upper()] 1.109 + elif name: 1.110 + cls = self.types.Monomer.by_name[name.strip().capitalize()] 1.111 + monomer = cls() 1.112 + monomer.sequence = self 1.113 + monomer.input_code1 = code1 1.114 + self.append(monomer) 1.115 + return monomer 1.116 1.117 @classmethod 1.118 def from_string(cls, string, name='', description='', source=''): 1.119 """Create sequences from string of one-letter codes.""" 1.120 - monomer = cls.types.Monomer.from_code1 1.121 - monomers = [monomer(letter) for letter in string] 1.122 - return cls.from_monomers(monomers, name, description, source) 1.123 + self = cls([], name=name, description=description, source=source) 1.124 + for letter in string: 1.125 + self.append_monomer(code1=letter) 1.126 + return self 1.127 1.128 def __repr__(self): 1.129 if self.name: 1.130 @@ -202,6 +226,26 @@ 1.131 """Hash sequence by identity.""" 1.132 return id(self) 1.133 1.134 + def __setstate__(self, state): 1.135 + """Overcome difficulties with pickle: add `monomer.sequence` after loading. 1.136 + 1.137 + Pickle is unable to store `set`s/`dict`s that have objects referencing 1.138 + back the `set`/`dict` itself, which `sequence` in monomer does. 1.139 + ( http://bugs.python.org/issue9269 ) 1.140 + 1.141 + To sidestep the bug we store the monomer WITHOUT `sequence` attribute. 1.142 + 1.143 + See also `Monomer.__getstate__`. 1.144 + """ 1.145 + vars(self).update(state) 1.146 + for monomer in self: 1.147 + monomer.sequence = self 1.148 + 1.149 + @classmethod 1.150 + def from_monomers(cls, *args, **kws): 1.151 + """OBSOLETE.""" 1.152 + raise AttributeError("Sequence.from_monomers is obsolete") 1.153 + 1.154 class Alignment(MarkupContainerMixin): 1.155 """Alignment. It is a list of Columns.""" 1.156
2.1 --- a/allpy/dna.py Sat Jun 02 19:29:40 2012 +0400 2.2 +++ b/allpy/dna.py Sat Jun 02 19:33:42 2012 +0400 2.3 @@ -20,19 +20,15 @@ 2.4 2.5 Name of the sequence is name of self with apostrophe added. 2.6 """ 2.7 - from_monomers = self.types.Sequence.from_monomers 2.8 - from_code1 = self.types.Monomer.from_code1 2.9 complement = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'} 2.10 - complemented_monomers = [ 2.11 - from_code1(complement.get(monomer.code1, 'N')) 2.12 - for monomer in self 2.13 - ] 2.14 - return from_monomers( 2.15 - reversed(complemented_monomers), 2.16 - self.name + "'", 2.17 - self.description, 2.18 - self.source 2.19 + result = self.types.Sequence( 2.20 + name=self.name +"'", 2.21 + description=self.description, 2.22 + source=self.source 2.23 ) 2.24 + for monomer in reversed(self): 2.25 + result.append_monomer(complement.get(monomer.code1, 'N')) 2.26 + return result 2.27 2.28 class Column(base.Column): 2.29 types = dna
3.1 --- a/allpy/structure.py Sat Jun 02 19:29:40 2012 +0400 3.2 +++ b/allpy/structure.py Sat Jun 02 19:33:42 2012 +0400 3.3 @@ -217,15 +217,13 @@ 3.4 cappbuilder = CaPPBuilder() 3.5 peptides = cappbuilder.build_peptides(chain) 3.6 Sequence = cls 3.7 - Monomer = Sequence.types.Monomer 3.8 sequence = Sequence() 3.9 sequence.pdb_chain = chain 3.10 for peptide in peptides: 3.11 for ca_atom in peptide.get_ca_list(): 3.12 residue = ca_atom.get_parent() 3.13 - monomer = Monomer.from_code3(residue.get_resname()) 3.14 + monomer = sequence.append_monomer(code3=residue.get_resname()) 3.15 monomer.pdb_residue = residue 3.16 - sequence.append(monomer) 3.17 return sequence 3.18 3.19 def auto_pdb(self, conformity=None, pdb_getter=download_pdb, xyz_only=False,
4.1 --- a/blocks3d/blocks3d.py Sat Jun 02 19:29:40 2012 +0400 4.2 +++ b/blocks3d/blocks3d.py Sat Jun 02 19:33:42 2012 +0400 4.3 @@ -12,7 +12,7 @@ 4.4 from allpy.argparse_validators import f_nng, part, timeout, pos, i_nng 4.5 from allpy.structure import CachedDownloadPdb 4.6 4.7 -from protein_pdb import Alignment, Block, Monomer, Sequence 4.8 +from protein_pdb import Alignment, Block, Sequence 4.9 from html import html_template 4.10 4.11 r = argparse.FileType('r')
5.1 --- a/pair_cores/mkhtml.py Sat Jun 02 19:29:40 2012 +0400 5.2 +++ b/pair_cores/mkhtml.py Sat Jun 02 19:33:42 2012 +0400 5.3 @@ -4,7 +4,7 @@ 5.4 import json 5.5 import sys 5.6 5.7 -from protein_pdb import Alignment, Block, Monomer, Sequence 5.8 +from protein_pdb import Alignment, Block, Sequence 5.9 from html import html_template 5.10 5.11 fasta_file = sys.argv[1]
6.1 --- a/pair_cores/pair_cores.py Sat Jun 02 19:29:40 2012 +0400 6.2 +++ b/pair_cores/pair_cores.py Sat Jun 02 19:33:42 2012 +0400 6.3 @@ -9,7 +9,7 @@ 6.4 6.5 from allpy import config 6.6 from allpy.argparse_validators import timeout 6.7 -from protein_pdb import Alignment, Block, Monomer, Sequence 6.8 +from protein_pdb import Alignment, Block, Sequence 6.9 from allpy import processors, markups 6.10 import allpy.base 6.11 from html import html_template
7.1 --- a/pair_cores/rasmol_output.py Sat Jun 02 19:29:40 2012 +0400 7.2 +++ b/pair_cores/rasmol_output.py Sat Jun 02 19:33:42 2012 +0400 7.3 @@ -4,7 +4,7 @@ 7.4 from gzip import GzipFile 7.5 from copy import copy 7.6 7.7 -from protein_pdb import Alignment, Block, Monomer, Sequence 7.8 +from protein_pdb import Alignment, Block, Sequence 7.9 from allpy import structure 7.10 7.11 def load(pdb_code):
8.1 --- a/test/test_base.py Sat Jun 02 19:29:40 2012 +0400 8.2 +++ b/test/test_base.py Sat Jun 02 19:33:42 2012 +0400 8.3 @@ -1,7 +1,7 @@ 8.4 import re 8.5 from StringIO import StringIO 8.6 8.7 -import allpy.base as b 8.8 +import allpy.base 8.9 import allpy.protein as p 8.10 from allpy import processors 8.11 8.12 @@ -16,21 +16,28 @@ 8.13 8.14 def test_new_monomers(): 8.15 """Test creation of monomer objects""" 8.16 + s = allpy.base.Sequence() 8.17 8.18 try: 8.19 - m = b.Monomer.from_code1('A') 8.20 + m = s.append_monomer(code1='A') 8.21 except Exception: 8.22 pass 8.23 else: 8.24 - assert False, "base.Monomers must not be constructible from code1" 8.25 + assert False, "base.Seqeuence must not be constructible from code1" 8.26 8.27 - m = b.Monomer.from_code3('ALA') 8.28 + m = p.Sequence().append_monomer(code3='ALA') 8.29 assert m.__class__.__name__ == "Alanine" 8.30 assert m.code1 == "A" 8.31 assert m.code3 == "ALA" 8.32 assert m.name == "Alanine" 8.33 8.34 - m = b.Monomer.from_name("alaNINE") 8.35 + m = s.append_monomer(code3='ALA') 8.36 + assert m.__class__.__name__ == "Alanine" 8.37 + assert m.code1 == "A" 8.38 + assert m.code3 == "ALA" 8.39 + assert m.name == "Alanine" 8.40 + 8.41 + m = s.append_monomer(name="alaNINE") 8.42 assert m.__class__.__name__ == "Alanine" 8.43 assert m.code1 == "A" 8.44 assert m.code3 == "ALA" 8.45 @@ -38,7 +45,7 @@ 8.46 8.47 # This actually tests for proper overriding when there are many monomers 8.48 # with the same code3. The unmodified one takes precedence. 8.49 - m = b.Monomer.from_code3('SEC') 8.50 + m = s.append_monomer(code3='SEC') 8.51 assert m.name == "Selenocysteine" 8.52 8.53 def assert_alignment(alignment, *body):
9.1 --- a/test/test_pickle.py Sat Jun 02 19:29:40 2012 +0400 9.2 +++ b/test/test_pickle.py Sat Jun 02 19:33:42 2012 +0400 9.3 @@ -18,5 +18,6 @@ 9.4 s = pickle.load(file) 9.5 assert s.name == 'sequence' 9.6 assert str(s) == 'SEQVENCE' 9.7 + assert s[0].sequence is s 9.8 9.9 # vim: set et ts=4 sts=4 sw=4: