allpy
changeset 1014:4349a1f31dd2
Added generic alignment & sequence markup classes for atomic types: int, str, char, float, bool -- and tests for them (see #94,#55)
author | Daniil Alexeyevsky <dendik@kodomo.fbb.msu.ru> |
---|---|
date | Tue, 06 Mar 2012 20:29:34 +0400 |
parents | 5dc4f805b000 |
children | 95a74440afaa |
files | allpy/markups.py test/test_markups.py |
diffstat | 2 files changed, 136 insertions(+), 11 deletions(-) [+] |
line diff
1.1 --- a/allpy/markups.py Tue Mar 06 16:08:49 2012 +0400 1.2 +++ b/allpy/markups.py Tue Mar 06 20:29:34 2012 +0400 1.3 @@ -22,10 +22,13 @@ 1.4 global by_name 1.5 by_name = {} 1.6 for markup_class in globals().values(): 1.7 - if hasattr(markup_class, 'name') and hasattr(markup_class, 'kind'): 1.8 - fullname = markup_class.kind, markup_class.name 1.9 - assert fullname not in by_name, "Samenamed markup already exists!" 1.10 - by_name[fullname] = markup_class 1.11 + if not hasattr(markup_class, 'name') or not markup_class.name: 1.12 + continue 1.13 + if not hasattr(markup_class, 'kind') or not markup_class.kind: 1.14 + continue 1.15 + fullname = markup_class.kind, markup_class.name 1.16 + assert fullname not in by_name, "Markup %s already exists!" % (fullname,) 1.17 + by_name[fullname] = markup_class 1.18 1.19 class MarkupIOMixin(base.Markup): 1.20 """Standard helper mixin for creating saveable markups.""" 1.21 @@ -53,16 +56,16 @@ 1.22 def from_record(cls, container, record, name=None): 1.23 """Read markup from semi-parsed record from 'markup' file.""" 1.24 assert record['io_class'] == cls.io_class 1.25 - separator = record.get('separator', cls.separator) 1.26 - quotes = record.get('quotes', cls.quotes) 1.27 - values = record['markup'].strip(quotes) 1.28 - if separator: 1.29 - values = values.split(separator) 1.30 result = container.add_markup(name, markup_class=cls) 1.31 + result.separator = record.get('separator', cls.separator) 1.32 + result.quotes = record.get('quotes', cls.quotes) 1.33 + values = record['markup'].strip(result.quotes) 1.34 + if result.separator: 1.35 + values = values.split(result.separator) 1.36 assert len(values) == len(result.sorted_keys()) 1.37 for key, value in zip(result.sorted_keys(), values): 1.38 if value: 1.39 - result[key] = cls.parse_item(key, value) 1.40 + result[key] = result.parse_item(key, value) 1.41 return result 1.42 1.43 def to_record(self): 1.44 @@ -90,6 +93,31 @@ 1.45 def parse_item(key, value): 1.46 return int(value) 1.47 1.48 +class FloatMarkupMixin(MarkupIOMixin): 1.49 + """Markup that has float values.""" 1.50 + 1.51 + io_class = 'FloatMarkup' 1.52 + 1.53 + @staticmethod 1.54 + def parse_item(key, value): 1.55 + return float(value) 1.56 + 1.57 +class BoolMarkupMixin(MarkupIOMixin): 1.58 + """Markup that has boolean values.""" 1.59 + 1.60 + io_class = 'BoolMarkup' 1.61 + 1.62 + @staticmethod 1.63 + def parse_item(key, value): 1.64 + return value == '+' 1.65 + 1.66 + @staticmethod 1.67 + def repr_item(key, value): 1.68 + if value: 1.69 + return '+' 1.70 + else: 1.71 + return '-' 1.72 + 1.73 class CharMarkupMixin(MarkupIOMixin): 1.74 """Markup that has one-letter values.""" 1.75 1.76 @@ -105,10 +133,36 @@ 1.77 assert len(str(value)) == 1 1.78 return str(value) 1.79 1.80 +class StrMarkupMixin(MarkupIOMixin): 1.81 + """Markup that is capable of storing ANY strings.""" 1.82 + 1.83 + io_class = 'StrMarkup' 1.84 + 1.85 + def parse_item(self, key, value): 1.86 + return value.replace("%2C", self.separator).replace("%25", "%") 1.87 + 1.88 + def repr_item(self, key, value): 1.89 + return str(value).replace("%", "%25").replace(self.separator, "%2C") 1.90 + 1.91 +class SequenceStrMarkup(base.SequenceMarkup, StrMarkupMixin): 1.92 + """Generic class for string-based markups.""" 1.93 + pass 1.94 + 1.95 class SequenceCharMarkup(base.SequenceMarkup, CharMarkupMixin): 1.96 """Generic class for one-letter markups.""" 1.97 + pass 1.98 1.99 - name = 'char' 1.100 +class SequenceIntMarkup(base.SequenceMarkup, IntMarkupMixin): 1.101 + """Generic class for integer number markups.""" 1.102 + pass 1.103 + 1.104 +class SequenceFloatMarkup(base.SequenceMarkup, FloatMarkupMixin): 1.105 + """Generic class for floating-point number markups.""" 1.106 + pass 1.107 + 1.108 +class SequenceBoolMarkup(base.SequenceMarkup, BoolMarkupMixin): 1.109 + """Generic class for boolean markups.""" 1.110 + pass 1.111 1.112 class SequenceNumberMarkup(base.SequenceMarkup): 1.113 1.114 @@ -126,6 +180,26 @@ 1.115 for index, monomer in enumerate(self.sequence): 1.116 monomer.index = index 1.117 1.118 +class AlignmentStrMarkup(base.AlignmentMarkup, StrMarkupMixin): 1.119 + """Generic class for string-based markups.""" 1.120 + pass 1.121 + 1.122 +class AlignmentCharMarkup(base.AlignmentMarkup, CharMarkupMixin): 1.123 + """Generic class for one-letter markups.""" 1.124 + pass 1.125 + 1.126 +class AlignmentIntMarkup(base.AlignmentMarkup, IntMarkupMixin): 1.127 + """Generic class for integer number markups.""" 1.128 + pass 1.129 + 1.130 +class AlignmentFloatMarkup(base.AlignmentMarkup, FloatMarkupMixin): 1.131 + """Generic class for floating-point number markups.""" 1.132 + pass 1.133 + 1.134 +class AlignmentBoolMarkup(base.AlignmentMarkup, BoolMarkupMixin): 1.135 + """Generic class for boolean markups.""" 1.136 + pass 1.137 + 1.138 class AlignmentNumberMarkup(base.AlignmentMarkup): 1.139 1.140 name = 'number'
2.1 --- a/test/test_markups.py Tue Mar 06 16:08:49 2012 +0400 2.2 +++ b/test/test_markups.py Tue Mar 06 20:29:34 2012 +0400 2.3 @@ -117,6 +117,57 @@ 2.4 assert s[0].ss == 'H' 2.5 assert s[5].ss == 'S' 2.6 2.7 +def test_generic(): 2.8 + aln = (protein.Alignment(). 2.9 + append_row_from_string('aseq-vence', name='a'). 2.10 + append_row_from_string('ase-qret--', name='b') 2.11 + ) 2.12 + 2.13 + s1, s2 = aln.sequences 2.14 + 2.15 + try: 2.16 + s1.add_markup(markup_class=markups.SequenceStrMarkup) 2.17 + except Exception: 2.18 + pass 2.19 + else: 2.20 + raise AssertionError("Generic markup must not be creatable without name") 2.21 + 2.22 + s1.add_markup("a", markup_class=markups.SequenceStrMarkup) 2.23 + s1[6].a = "Q" 2.24 + 2.25 + s1.add_markup("b", markup_class=markups.SequenceStrMarkup) 2.26 + s1[5].b = "hello, world %)" 2.27 + 2.28 + s2.add_markup("c", markup_class=markups.SequenceIntMarkup) 2.29 + s2[4].c = 42 2.30 + 2.31 + s1.add_markup("d", markup_class=markups.SequenceFloatMarkup) 2.32 + s1[3].d = 9000.001 2.33 + 2.34 + s2.add_markup("e", markup_class=markups.SequenceBoolMarkup) 2.35 + s2[2].e = "a" is "a" 2.36 + s2[6].e = "a" is "b" 2.37 + 2.38 + f = aln.add_markup("f", markup_class=markups.AlignmentBoolMarkup) 2.39 + f[aln.columns[3]] = 5 2.40 + 2.41 + file = StringIO() 2.42 + aln.to_file(file, format='markup') 2.43 + 2.44 + print file.getvalue() 2.45 + 2.46 + file.seek(0) 2.47 + out = protein.Alignment().append_file(file, format='markup') 2.48 + 2.49 + s1, s2 = out.sequences 2.50 + assert s1[6].a == "Q" 2.51 + assert s1[5].b == "hello, world %)" 2.52 + assert s2[4].c == 42 2.53 + assert abs(s1[3].d - 9000.001) < 0.001 2.54 + assert s2[2].e is True 2.55 + assert s2[6].e is False 2.56 + assert out.markups["f"][out.columns[3]] is True 2.57 + 2.58 def test_ss(): 2.59 seq = protein_pdb.Sequence.from_string('QLSSDAQTANAKADQASNDANAARS', '1jcc_B') 2.60 seq.auto_pdb()