Документ взят из кэша поисковой машины. Адрес оригинального документа : http://kodomo.fbb.msu.ru/hg/allpy/rev/2785ca30cea5
Дата изменения: Unknown
Дата индексирования: Mon Oct 1 23:31:13 2012
Кодировка: UTF-8
allpy: 2785ca30cea5

allpy

changeset 360:2785ca30cea5

Automated merge with ssh://kodomo/allpy
author Daniil Alexeyevsky <me.dendik@gmail.com>
date Tue, 25 Jan 2011 16:03:00 +0300
parents 3048f18154ef 1c06c34c4df2
children ef122cabf4e8
files __init__.py blocks3d/wt/config.C geometrical_core/geometrical_core.py lib/__init__.py lib/allpy_data/AAdict.py lib/allpy_data/__init__.py lib/allpy_data/amino_acids.py lib/allpy_data/blossum62.py lib/allpy_pdb.py lib/block.py lib/config.py lib/fasta.py lib/graph.py lib/monomer.py lib/project.py lib/sequence.py sec_str/sec_str.py
diffstat 78 files changed, 4642 insertions(+), 3324 deletions(-) [+]
line diff
     1.1 --- a/.hgignore	Mon Jan 24 21:40:10 2011 +0300
     1.2 +++ b/.hgignore	Tue Jan 25 16:03:00 2011 +0300
     1.3 @@ -1,4 +1,27 @@
     1.4  syntax: glob
     1.5 +
     1.6 +# Compiler-generated junk
     1.7  *.pyc
     1.8  build
     1.9 +
    1.10 +# Junk from text-editors
    1.11  *.geany
    1.12 +*.swp
    1.13 +
    1.14 +# Temporary files from sphinx
    1.15 +docs/build
    1.16 +
    1.17 +# Temporary files from debianization
    1.18 +debian/python-allpy/
    1.19 +debian/geometrical-core/
    1.20 +debian/*.substvars
    1.21 +debian/*.debhelper*
    1.22 +debian/*stamp*
    1.23 +debian/files
    1.24 +debian/control
    1.25 +debian/*.log
    1.26 +debian/*/*.log
    1.27 +debian/tmp/
    1.28 +debian/pycompat
    1.29 +*.deb
    1.30 +*.cdbs-config_list
     2.1 --- a/.hgtags	Mon Jan 24 21:40:10 2011 +0300
     2.2 +++ b/.hgtags	Tue Jan 25 16:03:00 2011 +0300
     2.3 @@ -1,1 +1,2 @@
     2.4  c51ef42eb5e5c2c98dac3c99271777905fb4da76 first run
     2.5 +c1b67c10f8f3db62cce4b70c292a5882e91bf5b6 0.0
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/README.devel	Tue Jan 25 16:03:00 2011 +0300
     3.3 @@ -0,0 +1,13 @@
     3.4 +To use the library in-place, without installing, you have to add it's
     3.5 +directory to PYTHONPATH.
     3.6 +
     3.7 +e.g, type this in your shell:
     3.8 +
     3.9 +	export PYTHONPATH=`pwd`
    3.10 +
    3.11 +alternatively, you can type:
    3.12 +
    3.13 +  . ./SETPATH
    3.14 +
    3.15 +which adds the right path to PYTHONPATH and also adds 'utils' directory
    3.16 +to PATH
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/SETPATH	Tue Jan 25 16:03:00 2011 +0300
     4.3 @@ -0,0 +1,22 @@
     4.4 +#!/bin/bash
     4.5 +
     4.6 +say () { echo "$@" >&2; }
     4.7 +
     4.8 +main () {
     4.9 +	local newPYTHONPATH
    4.10 +
    4.11 +	[[ "$0" != *sh ]] && say "Usage: . $0" && return 1
    4.12 +
    4.13 +	newPYTHONPATH="$(readlink -f "$(dirname "$BASH_SOURCE")")"
    4.14 +
    4.15 +	[[ "$PYTHONPATH" = "$newPYTHONPATH" ]] && say "Already done" && return
    4.16 +	[[ -n "$PYTHONPATH" ]] && say "PYTHONPATH was not empty" && return
    4.17 +
    4.18 +	export PYTHONPATH="$newPYTHONPATH"
    4.19 +	export PATH="$newPYTHONPATH/utils:$PATH"
    4.20 +
    4.21 +	echo "PYTHONPATH=$PYTHONPATH"
    4.22 +	echo "PATH=$PATH"
    4.23 +}
    4.24 +
    4.25 +main
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/allpy/base.py	Tue Jan 25 16:03:00 2011 +0300
     5.3 @@ -0,0 +1,340 @@
     5.4 +import sys
     5.5 +import re
     5.6 +
     5.7 +import util
     5.8 +import fasta
     5.9 +
    5.10 +default_gaps = set((".", "-", "~"))
    5.11 +"""Set of characters to recoginze as gaps when parsing alignment."""
    5.12 +
    5.13 +class Monomer(object):
    5.14 +    """Monomer object."""
    5.15 +
    5.16 +    type = None
    5.17 +    """Either of 'dna', 'rna', 'protein'."""
    5.18 +
    5.19 +    by_code1 = {}
    5.20 +    """A mapping from 1-letter code to Monomer subclass."""
    5.21 +
    5.22 +    by_code3 = {}
    5.23 +    """A mapping from 3-letter code to Monomer subclass."""
    5.24 +
    5.25 +    by_name = {}
    5.26 +    """A mapping from full monomer name to Monomer subclass."""
    5.27 +
    5.28 +    @classmethod
    5.29 +    def _subclass(cls, name='', code1='', code3='', is_modified=False):
    5.30 +        """Create new subclass of Monomer for given monomer type."""
    5.31 +        class TheMonomer(cls):
    5.32 +            pass
    5.33 +        name = name.strip().capitalize()
    5.34 +        code1 = code1.upper()
    5.35 +        code3 = code3.upper()
    5.36 +        TheMonomer.__name__ = re.sub(r"[^\w]", "_", name)
    5.37 +        TheMonomer.name = name
    5.38 +        TheMonomer.code1 = code1
    5.39 +        TheMonomer.code3 = code3
    5.40 +        TheMonomer.is_modified = is_modified
    5.41 +        if not is_modified:
    5.42 +            cls.by_code1[code1] = TheMonomer
    5.43 +        cls.by_code3[code3] = TheMonomer
    5.44 +        cls.by_name[name] = TheMonomer
    5.45 +        # We duplicate distinguished long names into Monomer itself, so that we
    5.46 +        # can use Monomer.from_code3 to create the relevant type of monomer.
    5.47 +        Monomer.by_code3[code3] = TheMonomer
    5.48 +        Monomer.by_name[name] = TheMonomer
    5.49 +
    5.50 +    @classmethod
    5.51 +    def _initialize(cls, codes=None):
    5.52 +        """Create all relevant subclasses of Monomer."""
    5.53 +        # NB. The table uses letters d, r, p for types,
    5.54 +        # while we use full words; hence, we compare by first letter
    5.55 +        for type, code1, is_modified, code3, name in codes:
    5.56 +            if type[0] == cls.type[0]:
    5.57 +                cls._subclass(name, code1, code3, is_modified)
    5.58 +
    5.59 +    @classmethod
    5.60 +    def from_code1(cls, code1):
    5.61 +        """Create new monomer from 1-letter code."""
    5.62 +        return cls.by_code1[code1.upper()]()
    5.63 +
    5.64 +    @classmethod
    5.65 +    def from_code3(cls, code3):
    5.66 +        """Create new monomer from 3-letter code."""
    5.67 +        return cls.by_code3[code3.upper()]()
    5.68 +
    5.69 +    @classmethod
    5.70 +    def from_name(cls, name):
    5.71 +        """Create new monomer from full name."""
    5.72 +        return cls.by_name[name.strip().capitalize()]()
    5.73 +
    5.74 +    def __repr__(self):
    5.75 +        return '<Monomer %s>' % self.code3
    5.76 +
    5.77 +    def __str__(self):
    5.78 +        """Returns one-letter code"""
    5.79 +        return self.code1
    5.80 +
    5.81 +    def __eq__(self, other):
    5.82 +        """Monomers within same monomer type are compared by code1."""
    5.83 +        assert self.type == other.type
    5.84 +        return self.code1 == other.code1
    5.85 +
    5.86 +class Sequence(list):
    5.87 +    """Sequence of Monomers.
    5.88 +
    5.89 +    This behaves like list of monomer objects. In addition to standard list
    5.90 +    behaviour, Sequence has the following attributes:
    5.91 +
    5.92 +    *   name -- str with the name of the sequence
    5.93 +    *   description -- str with description of the sequence
    5.94 +    *   source -- str denoting source of the sequence
    5.95 +
    5.96 +    Any of them may be empty (i.e. hold empty string)
    5.97 +
    5.98 +    Class attributes:
    5.99 +
   5.100 +    *   monomer_type -- type of monomers in sequence, must be redefined when
   5.101 +        subclassing
   5.102 +    """
   5.103 +
   5.104 +    monomer_type = Monomer
   5.105 +
   5.106 +    name = ''
   5.107 +    description = ''
   5.108 +    source = ''
   5.109 +
   5.110 +    @classmethod
   5.111 +    def from_monomers(cls, monomers=[], name=None, description=None, source=None):
   5.112 +        """Create sequence from a list of monomer objecst."""
   5.113 +        result = cls()
   5.114 +        if name:
   5.115 +            result.name = name
   5.116 +        if description:
   5.117 +            result.description = description
   5.118 +        if source:
   5.119 +            result.source = source
   5.120 +        return result
   5.121 +
   5.122 +    @classmethod
   5.123 +    def from_string(cls, string, name='', description='', source=''):
   5.124 +        """Create sequences from string of one-letter codes."""
   5.125 +        monomer = cls.monomer_type.from_code1
   5.126 +        monomers = [monomer(letter) for letter in string]
   5.127 +        return cls.from_monomers(monomers, name, description, source)
   5.128 +
   5.129 +    def __repr__(self):
   5.130 +        return '<Sequence %s>' % str(self)
   5.131 +
   5.132 +    def __str__(self):
   5.133 +        """Returns sequence of one-letter codes."""
   5.134 +        return ''.join(monomer.code1 for monomer in self)
   5.135 +
   5.136 +    def __hash__(self):
   5.137 +        """Hash sequence by identity."""
   5.138 +        return id(self)
   5.139 +
   5.140 +class Alignment(object):
   5.141 +    """Alignment. It is a list of Columns."""
   5.142 +
   5.143 +    sequence_type = Sequence
   5.144 +    """Type of sequences in alignment. SHOULD be redefined when subclassing."""
   5.145 +
   5.146 +    sequences = None
   5.147 +    """Ordered list of sequences in alignment. Read, but DO NOT FIDDLE!"""
   5.148 +
   5.149 +    def __init__(self):
   5.150 +        """Initialize empty alignment."""
   5.151 +        self.sequences = []
   5.152 +        self.columns = []
   5.153 +
   5.154 +    # Alignment modification methods
   5.155 +    # ==============================
   5.156 +
   5.157 +    def append_sequence(self, sequence):
   5.158 +        """Add sequence to alignment.
   5.159 +
   5.160 +        If sequence is too short, pad it with gaps on the right.
   5.161 +        """
   5.162 +        self.sequences.append(sequence)
   5.163 +        for i, monomer in enumerate(sequence):
   5.164 +            self.column_at(i)[sequence] = monomer
   5.165 +
   5.166 +    def append_row(self, string, name='', description='', source='',
   5.167 +            gaps=default_gaps):
   5.168 +        """Add row from a string of one-letter codes and gaps."""
   5.169 +        Sequence = self.sequence_type
   5.170 +        not_gap = lambda (i, char): char not in gaps
   5.171 +        without_gaps = util.remove_each(string, gaps)
   5.172 +        sequence = Sequence.from_string(without_gaps, name, description, source)
   5.173 +        # The following line has some simple magic:
   5.174 +        # 1. attach natural numbers to monomers
   5.175 +        # 2. delete gaps
   5.176 +        # 3. attach numbers again
   5.177 +        # This way we have a pair of numbers attached to monomer:
   5.178 +        # - it's position in alignment (the first attached number, j)
   5.179 +        # - it's position in sequence (the second attached number, i)
   5.180 +        for i, (j, char) in enumerate(filter(not_gap, enumerate(string))):
   5.181 +            self.column_at(j)[sequence] = sequence[i]
   5.182 +        self.sequences.append(sequence)
   5.183 +
   5.184 +    def column_at(self, n):
   5.185 +        """Return column by index. Create required new columns if required.
   5.186 +        
   5.187 +        Do NOT use this method, unless you are sure it is what you want.
   5.188 +        """
   5.189 +        for i in range(len(self.columns), n + 1):
   5.190 +            self.columns.append(Column())
   5.191 +        return self.columns[n]
   5.192 +
   5.193 +    # Alignment IO methods
   5.194 +    # ====================
   5.195 +
   5.196 +    @classmethod
   5.197 +    def from_file(cls, file, format='fasta', gaps=default_gaps):
   5.198 +        """Create new alignment from file."""
   5.199 +        self = cls()
   5.200 +        assert format == 'fasta'
   5.201 +        for (name, description, body) in fasta.parse_file(file):
   5.202 +            self.append_row(body, name, description, file.name, gaps)
   5.203 +        return self
   5.204 +
   5.205 +    def to_fasta(self, file):
   5.206 +        """Write alignment in FASTA file as sequences with gaps."""
   5.207 +        def char(monomer):
   5.208 +            if monomer:
   5.209 +                return monomer.code1
   5.210 +            return "-"
   5.211 +        for row in self.rows_as_lists():
   5.212 +            seq = row.sequence
   5.213 +            line = "".join(map(char, row))
   5.214 +            fasta.save_file(file, line, seq.name, seq.description)
   5.215 +
   5.216 +    # Data access methods for alignment
   5.217 +    # =================================
   5.218 +
   5.219 +    def rows(self):
   5.220 +        """Return list of rows (temporary objects) in alignment.
   5.221 +
   5.222 +        Each row is a dictionary of { column : monomer }.
   5.223 +        
   5.224 +        For gap positions there is no key for the column in row.
   5.225 +
   5.226 +        Each row has attribute `sequence` pointing to the sequence the row is
   5.227 +        describing.
   5.228 +
   5.229 +        Modifications of row have no effect on the alignment.
   5.230 +        """
   5.231 +        # For now, the function returns a list rather than iterator.
   5.232 +        # It is yet to see, whether memory performance here becomes critical,
   5.233 +        # or is random access useful.
   5.234 +        rows = []
   5.235 +        for sequence in self.sequences:
   5.236 +            row = util.UserDict()
   5.237 +            row.sequence = sequence
   5.238 +            for column in self.columns:
   5.239 +                if sequence in column:
   5.240 +                    row[column] = column[sequence]
   5.241 +            rows.append(row)
   5.242 +        return rows
   5.243 +
   5.244 +    def rows_as_lists(self):
   5.245 +        """Return list of rows (temporary objects) in alignment.
   5.246 +
   5.247 +        Each row here is a list of either monomer or None (for gaps).
   5.248 +
   5.249 +        Each row has attribute `sequence` pointing to the sequence of row.
   5.250 +
   5.251 +        Modifications of row have no effect on the alignment.
   5.252 +        """
   5.253 +        rows = []
   5.254 +        for sequence in self.sequences:
   5.255 +            row = util.UserList()
   5.256 +            row.sequence = sequence
   5.257 +            for column in self.columns:
   5.258 +                row.append(column.get(sequence))
   5.259 +            rows.append(row)
   5.260 +        return rows
   5.261 +
   5.262 +    def columns_as_lists(self):
   5.263 +        """Return list of columns (temorary objects) in alignment.
   5.264 +
   5.265 +        Each column here is a list of either monomer or None (for gaps).
   5.266 +
   5.267 +        Items of column are sorted in the same way as alignment.sequences.
   5.268 +
   5.269 +        Modifications of column have no effect on the alignment.
   5.270 +        """
   5.271 +        columns = []
   5.272 +        for column in self.columns:
   5.273 +            col = []
   5.274 +            for sequence in self.sequences:
   5.275 +                col.append(column.get(sequence))
   5.276 +            columns.append(col)
   5.277 +        return columns
   5.278 +
   5.279 +class Column(dict):
   5.280 +    """Column of alignment.
   5.281 +
   5.282 +    Column is a dict of { sequence : monomer }.
   5.283 +
   5.284 +    For sequences that have gaps in current row, given key is not present in
   5.285 +    the column.
   5.286 +    """
   5.287 +
   5.288 +    def __hash__(self):
   5.289 +        """Return hash by identity."""
   5.290 +        return id(self)
   5.291 +
   5.292 +class Block(Alignment):
   5.293 +    """Block of alignment.
   5.294 +
   5.295 +    Block is intersection of a set of columns & a set of rows. Most of blocks
   5.296 +    look like rectangular part of alignment if you shuffle alignment rows the
   5.297 +    right way.
   5.298 +    """
   5.299 +
   5.300 +    alignment = None
   5.301 +    """Alignment the block belongs to."""
   5.302 +
   5.303 +    sequences = ()
   5.304 +    """List of sequences in block."""
   5.305 +
   5.306 +    columns = ()
   5.307 +    """List of columns in block."""
   5.308 +
   5.309 +    @classmethod
   5.310 +    def from_alignment(cls, alignment, sequences=None, columns=None):
   5.311 +        """Build new block from alignment.
   5.312 +
   5.313 +        If sequences are not given, the block uses all sequences in alignment.
   5.314 +
   5.315 +        If columns are not given, the block uses all columns in alignment.
   5.316 +
   5.317 +        In both cases we use exactly the list used in alignment, thus, if new
   5.318 +        sequences or columns are added to alignment, the block tracks this too.
   5.319 +        """
   5.320 +        if sequences is None:
   5.321 +            sequences = alignment.sequences
   5.322 +        if columns is None:
   5.323 +            columns = alignment.columns
   5.324 +        block = cls()
   5.325 +        block.alignment = alignment
   5.326 +        block.sequences = sequences
   5.327 +        block.columns = columns
   5.328 +        return block
   5.329 +
   5.330 +    def flush_left(self):
   5.331 +        """Move all monomers to the left, gaps to the right within block."""
   5.332 +        padding = [None] * len(self.columns)
   5.333 +        for row in self.rows_as_lists():
   5.334 +            sequence = row.sequence
   5.335 +            row = filter(None, row) + padding
   5.336 +            for monomer, column in zip(row, self.columns):
   5.337 +                if monomer:
   5.338 +                    column[sequence] = monomer
   5.339 +                elif sequence in column:
   5.340 +                    del column[sequence]
   5.341 +                
   5.342 +
   5.343 +# vim: set ts=4 sts=4 sw=4 et:
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/allpy/config.py	Tue Jan 25 16:03:00 2011 +0300
     6.3 @@ -0,0 +1,18 @@
     6.4 +
     6.5 +delta = 2.0 # for geometrical core building
     6.6 +minsize = 20 # min size of returning cores
     6.7 +maxabsent = 0.15 # deprecated?
     6.8 +
     6.9 +# pdb download url (XXXX is pdb code place)
    6.10 +pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb'
    6.11 +pdb_dir = '/tmp/%s.pdb'
    6.12 +timeout = 10 # time in sec. for BRON-KERBOSH algorithm
    6.13 +
    6.14 +
    6.15 +# min part or new atoms in new alternative core
    6.16 +ac_new_atoms = 0.5
    6.17 +
    6.18 +# max number of cores (including main core)
    6.19 +ac_count = 5
    6.20 +
    6.21 +
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/allpy/data/__init__.py	Tue Jan 25 16:03:00 2011 +0300
     7.3 @@ -0,0 +1,3 @@
     7.4 +"""
     7.5 +Module that contains various data relevant to biological sequences.
     7.6 +"""
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/allpy/data/blossum62.py	Tue Jan 25 16:03:00 2011 +0300
     8.3 @@ -0,0 +1,28 @@
     8.4 +matrix = {
     8.5 +"A": {"A":  4, "R": -1, "N": -2, "D": -2, "C":  0, "Q": -1, "E": -1, "G":  0, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S":  1, "T":  0, "W": -3, "Y": -2, "V":  0, "B": -2, "Z": -1, "X":  0, "*": -4},
     8.6 +"R": {"A": -1, "R":  5, "N":  0, "D": -2, "C": -3, "Q":  1, "E":  0, "G": -2, "H":  0, "I": -3, "L": -2, "K":  2, "M": -1, "F": -3, "P": -2, "S": -1, "T": -1, "W": -3, "Y": -2, "V": -3, "B": -1, "Z":  0, "X": -1, "*": -4},
     8.7 +"N": {"A": -2, "R":  0, "N":  6, "D":  1, "C": -3, "Q":  0, "E":  0, "G":  0, "H":  1, "I": -3, "L": -3, "K":  0, "M": -2, "F": -3, "P": -2, "S":  1, "T":  0, "W": -4, "Y": -2, "V": -3, "B":  3, "Z":  0, "X": -1, "*": -4},
     8.8 +"D": {"A": -2, "R": -2, "N":  1, "D":  6, "C": -3, "Q":  0, "E":  2, "G": -1, "H": -1, "I": -3, "L": -4, "K": -1, "M": -3, "F": -3, "P": -1, "S":  0, "T": -1, "W": -4, "Y": -3, "V": -3, "B":  4, "Z":  1, "X": -1, "*": -4},
     8.9 +"C": {"A":  0, "R": -3, "N": -3, "D": -3, "C":  9, "Q": -3, "E": -4, "G": -3, "H": -3, "I": -1, "L": -1, "K": -3, "M": -1, "F": -2, "P": -3, "S": -1, "T": -1, "W": -2, "Y": -2, "V": -1, "B": -3, "Z": -3, "X": -2, "*": -4},
    8.10 +"Q": {"A": -1, "R":  1, "N":  0, "D":  0, "C": -3, "Q":  5, "E":  2, "G": -2, "H":  0, "I": -3, "L": -2, "K":  1, "M":  0, "F": -3, "P": -1, "S":  0, "T": -1, "W": -2, "Y": -1, "V": -2, "B":  0, "Z":  3, "X": -1, "*": -4},
    8.11 +"E": {"A": -1, "R":  0, "N":  0, "D":  2, "C": -4, "Q":  2, "E":  5, "G": -2, "H":  0, "I": -3, "L": -3, "K":  1, "M": -2, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  1, "Z":  4, "X": -1, "*": -4},
    8.12 +"G": {"A":  0, "R": -2, "N":  0, "D": -1, "C": -3, "Q": -2, "E": -2, "G":  6, "H": -2, "I": -4, "L": -4, "K": -2, "M": -3, "F": -3, "P": -2, "S":  0, "T": -2, "W": -2, "Y": -3, "V": -3, "B": -1, "Z": -2, "X": -1, "*": -4},
    8.13 +"H": {"A": -2, "R":  0, "N":  1, "D": -1, "C": -3, "Q":  0, "E":  0, "G": -2, "H":  8, "I": -3, "L": -3, "K": -1, "M": -2, "F": -1, "P": -2, "S": -1, "T": -2, "W": -2, "Y":  2, "V": -3, "B":  0, "Z":  0, "X": -1, "*": -4},
    8.14 +"I": {"A": -1, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -3, "E": -3, "G": -4, "H": -3, "I":  4, "L":  2, "K": -3, "M":  1, "F":  0, "P": -3, "S": -2, "T": -1, "W": -3, "Y": -1, "V":  3, "B": -3, "Z": -3, "X": -1, "*": -4},
    8.15 +"L": {"A": -1, "R": -2, "N": -3, "D": -4, "C": -1, "Q": -2, "E": -3, "G": -4, "H": -3, "I":  2, "L":  4, "K": -2, "M":  2, "F":  0, "P": -3, "S": -2, "T": -1, "W": -2, "Y": -1, "V":  1, "B": -4, "Z": -3, "X": -1, "*": -4},
    8.16 +"K": {"A": -1, "R":  2, "N":  0, "D": -1, "C": -3, "Q":  1, "E":  1, "G": -2, "H": -1, "I": -3, "L": -2, "K":  5, "M": -1, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  1, "X": -1, "*": -4},
    8.17 +"M": {"A": -1, "R": -1, "N": -2, "D": -3, "C": -1, "Q":  0, "E": -2, "G": -3, "H": -2, "I":  1, "L":  2, "K": -1, "M":  5, "F":  0, "P": -2, "S": -1, "T": -1, "W": -1, "Y": -1, "V":  1, "B": -3, "Z": -1, "X": -1, "*": -4},
    8.18 +"F": {"A": -2, "R": -3, "N": -3, "D": -3, "C": -2, "Q": -3, "E": -3, "G": -3, "H": -1, "I":  0, "L":  0, "K": -3, "M":  0, "F":  6, "P": -4, "S": -2, "T": -2, "W":  1, "Y":  3, "V": -1, "B": -3, "Z": -3, "X": -1, "*": -4},
    8.19 +"P": {"A": -1, "R": -2, "N": -2, "D": -1, "C": -3, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -3, "L": -3, "K": -1, "M": -2, "F": -4, "P":  7, "S": -1, "T": -1, "W": -4, "Y": -3, "V": -2, "B": -2, "Z": -1, "X": -2, "*": -4},
    8.20 +"S": {"A":  1, "R": -1, "N":  1, "D":  0, "C": -1, "Q":  0, "E":  0, "G":  0, "H": -1, "I": -2, "L": -2, "K":  0, "M": -1, "F": -2, "P": -1, "S":  4, "T":  1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  0, "X":  0, "*": -4},
    8.21 +"T": {"A":  0, "R": -1, "N":  0, "D": -1, "C": -1, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S":  1, "T":  5, "W": -2, "Y": -2, "V":  0, "B": -1, "Z": -1, "X":  0, "*": -4},
    8.22 +"W": {"A": -3, "R": -3, "N": -4, "D": -4, "C": -2, "Q": -2, "E": -3, "G": -2, "H": -2, "I": -3, "L": -2, "K": -3, "M": -1, "F":  1, "P": -4, "S": -3, "T": -2, "W": 11, "Y":  2, "V": -3, "B": -4, "Z": -3, "X": -2, "*": -4},
    8.23 +"Y": {"A": -2, "R": -2, "N": -2, "D": -3, "C": -2, "Q": -1, "E": -2, "G": -3, "H":  2, "I": -1, "L": -1, "K": -2, "M": -1, "F":  3, "P": -3, "S": -2, "T": -2, "W":  2, "Y":  7, "V": -1, "B": -3, "Z": -2, "X": -1, "*": -4},
    8.24 +"V": {"A":  0, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -2, "E": -2, "G": -3, "H": -3, "I":  3, "L":  1, "K": -2, "M":  1, "F": -1, "P": -2, "S": -2, "T":  0, "W": -3, "Y": -1, "V":  4, "B": -3, "Z": -2, "X": -1, "*": -4},
    8.25 +"B": {"A": -2, "R": -1, "N":  3, "D":  4, "C": -3, "Q":  0, "E":  1, "G": -1, "H":  0, "I": -3, "L": -4, "K":  0, "M": -3, "F": -3, "P": -2, "S":  0, "T": -1, "W": -4, "Y": -3, "V": -3, "B":  4, "Z":  0, "X": -1, "*": -4},
    8.26 +"Z": {"A": -1, "R":  0, "N":  0, "D":  1, "C": -3, "Q":  3, "E":  4, "G": -2, "H":  0, "I": -3, "L": -3, "K":  1, "M": -1, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  4, "X": -1, "*": -4},
    8.27 +"X": {"A":  0, "R": -1, "N": -1, "D": -1, "C": -2, "Q": -1, "E": -1, "G": -1, "H": -1, "I": -1, "L": -1, "K": -1, "M": -1, "F": -1, "P": -2, "S":  0, "T":  0, "W": -2, "Y": -1, "V": -1, "B": -1, "Z": -1, "X": -1, "*": -4},
    8.28 +"*": {"A": -4, "R": -4, "N": -4, "D": -4, "C": -4, "Q": -4, "E": -4, "G": -4, "H": -4, "I": -4, "L": -4, "K": -4, "M": -4, "F": -4, "P": -4, "S": -4, "T": -4, "W": -4, "Y": -4, "V": -4, "B": -4, "Z": -4, "X": -4, "*":  1}
    8.29 +}
    8.30 +
    8.31 +gaps = (-8, -4, -2, -1)
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/allpy/data/codes.py	Tue Jan 25 16:03:00 2011 +0300
     9.3 @@ -0,0 +1,1209 @@
     9.4 +"""Tables of monomer codes.
     9.5 +
     9.6 +`dna`, `rna`, `protein` are lists of all known codes for monomers of given
     9.7 +type. Each of them is a list of tuples of kind:
     9.8 +
     9.9 +    ( 1-letter code, is-modified?, 3-letter-code, fullname )
    9.10 +
    9.11 +`3-letter-code` is the code used in PDB (it may actually be one or
    9.12 +two letters)
    9.13 +
    9.14 +"""
    9.15 +
    9.16 +dna = (
    9.17 +('A', False, "DA", "2'-DEOXYADENOSINE-5'-MONOPHOSPHATE"),
    9.18 +('C', False, "DC", "2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE"),
    9.19 +('G', False, "DG", "2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
    9.20 +('T', False, "DT", "THYMIDINE-5'-MONOPHOSPHATE"),
    9.21 +('N', False, "", "Undefined DNA Nucleotide"),
    9.22 +('a', True, "0AM", "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"),
    9.23 +('a', True, "0AV", "2'-O-methyladenosine 5'-(dihydrogen phosphate)"),
    9.24 +('a', True, "0SP", "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"),
    9.25 +('a', True, "1AP", "2,6-DIAMINOPURINE"),
    9.26 +('a', True, "2AR", "2'-DEOXYARISTEROMYCIN-5'-PHOSPHATE"),
    9.27 +('a', True, "2BU", "(2S,3S)-N6-(2,3,4-TRIHYDROXYBUTYL)-2'-DEOXYADENOSINE MONO PHOSPHORIC ACID"),
    9.28 +('a', True, "2DA", "2',3'-DIDEOXYADENOSINE-5'-MONOPHOSPHATE"),
    9.29 +('a', True, "3DA", "3'-DEOXYADENOSINE-5'-MONOPHOSPHATE"),
    9.30 +('a', True, "5AA", "N6-DIMETHYL-3'-AMINO-ADENOSINE-5'-MONOPHOSPHATE"),
    9.31 +('a', True, "6HA", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(ADENIN-9-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"),
    9.32 +('a', True, "7DA", "7-DEAZA-2'-DEOXYADENOSINE-5'-MONOPHOSPHATE"),
    9.33 +('a', True, "A34", "N6-METHYL DEOXYADENOSINE 5'-MONOPHOSPHATE"),
    9.34 +('a', True, "A35", "2-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"),
    9.35 +('a', True, "A38", "8-OXY DEOXYADENOSINE-5'-MONOPHOSPHATE"),
    9.36 +('a', True, "A3A", "2'DEOXY-ALPHA-ANOMERIC-ADENOSINE-5'-PHOSPHATE"),
    9.37 +('a', True, "A40", "N2-METHYL 2'-DEOXYADENOSINE 5'-MONOPHOSPHATE"),
    9.38 +('a', True, "A43", "3'-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"),
    9.39 +('a', True, "A47", "N6-METHOXY ADENOSINE 5'-MONOPHOSPHATE"),
    9.40 +('a', True, "ABR", "(R)-(N-PHENYL-2-HYDROXY-ETHYL)-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"),
    9.41 +('a', True, "ABS", "(S)-(N-PHENYL-2-HYDROXY-ETHYL)-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"),
    9.42 +('a', True, "AD2", "2'-DEOXY-ADENOSINE-3'-5'-DIPHOSPHATE"),
    9.43 +('a', True, "AP7", "N1-PROTONATED ADENOSINE-5'-MONOPHOSPHATE"),
    9.44 +('a', True, "APN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-ADENINE"),
    9.45 +('a', True, "AS", "2-DEOXY-ADENOSINE -5'-THIO-MONOPHOSPHATE"),
    9.46 +('a', True, "E", "N-((-)-(7S,8R,9S,10R)[7,8,9-TRIHYDROXY-7,8,9,10-TETRA HYDROBENZO[A]PYREN-10-YL])-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"),
    9.47 +('a', True, "E1X", "PHOSPHORIC"),
    9.48 +('a', True, "EDA", "3-[2-DEOXY-RIBOFURANOSYL]-3H-1,3,4,5A,8-PENTAAZA-AS-INDACENE-5'-MONOPHOSPHATE"),
    9.49 +('a', True, "FA2", "5-(6-AMINO-9H-PURIN-9-YL)-4-HYDROXYTETRAHYDROFURAN-3-YL"),
    9.50 +('a', True, "MA7", "1N-METHYLADENOSINE-5'-MONOPHOSPHATE"),
    9.51 +('a', True, "PRN", "PURINE 2'-DEOXYRIBO-5'-MONOPHOSPHATE"),
    9.52 +('a', True, "R", "2'-DEOXY-N6-(R)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"),
    9.53 +('a', True, "RMP", "2'-DEOXY-ADENOSINE-5'-RP-MONOMETHYLPHOSPHONATE"),
    9.54 +('a', True, "S4A", "2'-deoxy-4'-thioadenosine 5'-(dihydrogen phosphate)"),
    9.55 +('a', True, "SMP", "2'-DEOXY-ADENOSINE-5'-SP-MONOMETHYLPHOSPHONATE"),
    9.56 +('a', True, "TCY", "(2R,3AS,4AR,5AR,5BS)-2-(6-AMINO-9H-PURIN-9-YL)-3A-HYDROXYHEXAHYDROCYCLOPROPA[4,5]CYCLOPENTA[1,2-B]FURAN-5A(4H)-YL"),
    9.57 +('a', True, "TFO", "[2-(6-AMINO-9H-PURIN-9-YL)-1-METHYLETHOXY]METHYLPHOSPHONIC"),
    9.58 +('a', True, "XAD", "9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-9H-PURIN-6-AMINE"),
    9.59 +('a', True, "XAL", "[(1S,4R,6R)-6-HYDROXY-4-(ADENIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"),
    9.60 +('a', True, "Y", "2'-DEOXY-N6-(S)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"),
    9.61 +('c', True, "0AP", "2'-deoxycytidine 3',5'-bis(dihydrogen phosphate)"),
    9.62 +('c', True, "4PC", "3-(2'-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3,7-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE"),
    9.63 +('c', True, "4PD", "3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-1,3-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE"),
    9.64 +('c', True, "4PE", "3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3H-PYRROLO[2,3-D]PYRIMIDIN-2-OL"),
    9.65 +('c', True, "4SC", "4'-THIO-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE GROUP"),
    9.66 +('c', True, "5CM", "5-METHYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.67 +('c', True, "5FC", "5-FORMYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.68 +('c', True, "5NC", "5-AZA-CYTIDINE-5'MONOPHOSPHATE"),
    9.69 +('c', True, "5PC", "5(1-PROPYNYL)-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE"),
    9.70 +('c', True, "6HC", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(CYTOSIN-1-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"),
    9.71 +('c', True, "C2S", "CYTIDINE-5'-DITHIOPHOSPHORATE"),
    9.72 +('c', True, "C32", "5-BROMO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.73 +('c', True, "C34", "N4-METHYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.74 +('c', True, "C36", "5-METHYL-5-FLUORO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.75 +('c', True, "C37", "5-FLUORO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.76 +('c', True, "C38", "5-IODO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.77 +('c', True, "C42", "3'-AMINO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.78 +('c', True, "C45", "N4-METHOXY-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.79 +('c', True, "C46", "6H,8H-3,4-DIHYDROPYRIMIDO[4,5-C][1,2]OXAZIN-7-0NE(CYTIDINE)-5'-MONOPHOSPHATE"),
    9.80 +('c', True, "C49", "4-THIO,5-FLUORO,5-METHYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.81 +('c', True, "C4S", "2'-deoxy-4'-thiocytidine 5'-(dihydrogen phosphate)"),
    9.82 +('c', True, "CAR", "CYTOSINE ARABINOSE-5'-PHOSPHATE"),
    9.83 +('c', True, "CB2", "PHOSPHORIC"),
    9.84 +('c', True, "CBR", "5-BROMO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.85 +('c', True, "CFL", "4-AMINO-1-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)PYRIMIDIN-2(1H)-ONE"),
    9.86 +('c', True, "CMR", "2'-DEOXY-CYTIDINE-5'-RP-MONOMETHYLPHOSPHONATE"),
    9.87 +('c', True, "CP1", "2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE"),
    9.88 +('c', True, "CPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE"),
    9.89 +('c', True, "CSL", "(D)-2'-METHYLSELENYL-2'-DEOXYCYTIDINE-5'-PHOSPHATE"),
    9.90 +('c', True, "DCT", "2',3'-DIDEOXYCYTIDINE 5'-TRIPHOSPHATE"),
    9.91 +('c', True, "DFC", "2'-DEOXY-L-RIBO-FURANOSYL CYTOSINE-5'-MONOPHOSPHATE"),
    9.92 +('c', True, "DNR", "2'-DEOXY-N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"),
    9.93 +('c', True, "DOC", "2',3'-DIDEOXYCYTIDINE-5'-MONOPHOSPHATE"),
    9.94 +('c', True, "GCK", "PHOSPHORIC ACID 9-(2-GUANIDINOETHOXY-3-(2-DEOXY-BETA-D-ERYTHROPENTOFURANOSYL))-3H-PYRIMIDO-[5,4-B][1,4]-BENZOOXAZIN-2-ONE]-5'-ESTER"),
    9.95 +('c', True, "I5C", "5-IODO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
    9.96 +('c', True, "IMC", "N1-[2-DEOXY-RIBOFURANOSYL]-[2-AMINO-5-METHYL-4-OXO-4H-PYRIMIDINE]-5'-MONOPHOSPHATE"),
    9.97 +('c', True, "MCY", "5-METHYL-2'-DEOXYCYTIDINE"),
    9.98 +('c', True, "SC", "2-DEOXY-CYTIDINE-5'-THIOPHOSPHORATE"),
    9.99 +('c', True, "TC1", "3-(5-PHOSPHO-2-DEOXY-BETA-D-RIBOFURANOSYL)-2-OXO-1,3-DIAZA-PHENOTHIAZINE"),
   9.100 +('c', True, "TPC", "5'-THIO-2'-DEOXY-CYTOSINE PHOSPHONIC ACID"),
   9.101 +('c', True, "XCL", "[(1S,4R,6R)-6-HYDROXY-4-(CYTOSIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"),
   9.102 +('c', True, "XCT", "4-AMINO-1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)PYRIMIDIN-2(1H)-ONE"),
   9.103 +('c', True, "XCY", "{5-[4-{[4-(AMINOMETHYL)BENZYL]AMINO}-2-OXOPYRIMIDIN-1(2H)-"),
   9.104 +('c', True, "Z", "ZEBULARINE"),
   9.105 +('g', True, "0AD", "2'-deoxy-N-propylguanosine 5'-(dihydrogen phosphate)"),
   9.106 +('g', True, "2EG", "2'-DEOXY-N-ETHYLGUANOSINE 5'-PHOSPHATE"),
   9.107 +('g', True, "2PR", "2-AMINO-9-[2-DEOXYRIBOFURANOSYL]-9H-PURINE-5'-MONOPHOSPHATE"),
   9.108 +('g', True, "5CG", "5'-CHLORO-5'-DEOXY-GUANOSINE"),
   9.109 +('g', True, "6HG", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(GUANIN-9-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"),
   9.110 +('g', True, "6OG", "6-O-METHYL GUANOSINE-5'-MONOPHOSPHATE"),
   9.111 +('g', True, "7GU", "7-DEAZA-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
   9.112 +('g', True, "8FG", "N-(5'-PHOSPHO-2'-DEOXYGUANOSIN-8-YL)-2-ACETYLAMINOFLUORENE"),
   9.113 +('g', True, "8MG", "8-METHYL-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
   9.114 +('g', True, "8OG", "8-OXO-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.115 +('g', True, "AFG", "N-(5'-PHOSPHO-2'-DEOXYGUANOSIN-8-YL)-2-AMINOFLUORENE"),
   9.116 +('g', True, "BGM", "8-BROMO-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
   9.117 +('g', True, "DCG", "2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.118 +('g', True, "DDG", "2',3'-DIDEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.119 +('g', True, "DFG", "2'-DEOXY-L-RIBO-FURANOSYL GUANINE-5'-MONOPHOSPHATE"),
   9.120 +('g', True, "DGI", "2'-DEOXYGUANOSINE-5'-DIPHOSPHATE"),
   9.121 +('g', True, "EDC", "N3,N4-ETHENO-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE"),
   9.122 +('g', True, "FMG", "2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-7-METHYL-6-OXO-6,9-DIHYDRO-1H-PURIN-7-IUM"),
   9.123 +('g', True, "FOX", "((1R,2S,4R)-4-{[2-AMINO-5-(FORMYLAMINO)-6-OXO-3,6-DIHYDROPYRIMIDIN-4-YL]AMINO}-2-HYDROXYCYCLOPENTYL)METHYL 5'-PHOSPHATE"),
   9.124 +('g', True, "G2S", "GUANOSINE-5'-DITHIOPHOSPHORATE"),
   9.125 +('g', True, "G31", "3'-METHYL-2',3'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.126 +('g', True, "G32", "O6-METHYL-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.127 +('g', True, "G33", "8-METHYL-2'-DEOXYGUANOSINE 3'-MONOPHOSPHATE"),
   9.128 +('g', True, "G36", "O6-ETHYL-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
   9.129 +('g', True, "G38", "3'-AMINO-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.130 +('g', True, "G42", "8-OXO-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.131 +('g', True, "G47", "N2-ETHANETHIOL-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.132 +('g', True, "G49", "N2-METHYL-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.133 +('g', True, "GDR", "GUANOSINE-5'-DIPHOSPHATE-RHAMNOSE"),
   9.134 +('g', True, "GFL", "2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE"),
   9.135 +('g', True, "GMS", "2'-DEOXYGUANOSINE-5'-MONOSELENOPHOSPHATE"),
   9.136 +('g', True, "GN7", "N7-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"),
   9.137 +('g', True, "GPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-GUANINE"),
   9.138 +('g', True, "GS", "GUANOSINE-5'-THIO-MONOPHOSPHATE"),
   9.139 +('g', True, "GSR", "2'-DEOXY-N2-(R)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"),
   9.140 +('g', True, "GSS", "2'-DEOXY-N2-(S)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"),
   9.141 +('g', True, "IGU", "2'-DEOXYISOGUANINE-5'-MONOPHOSPHATE"),
   9.142 +('g', True, "LCG", "[(1R,3R,4R,7S)-7-HYDROXY-3-(GUANIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"),
   9.143 +('g', True, "LGP", "N9-1-HYDROXY-PROP-2-OXYMETHYL-GUANINE-3'-MONOPHOSPHATE"),
   9.144 +('g', True, "M1G", "3-(2-DEOXY-BETA-D-RIBOFURANOSYL)-PYRIDO[5,6-A]-PURINE-10-ONE-5'-MONOPHOSPHATE"),
   9.145 +('g', True, "MG1", "2'-DEOXY-1-METHYLGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.146 +('g', True, "MRG", "N2-(3-MERCAPTOPROPYL)-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
   9.147 +('g', True, "P", "2'-DEOXY-N1,N2-PROPANO GUANOSINE MONOPHOSPHATE"),
   9.148 +('g', True, "PG7", "((2R,3R,5R)-5-(2-AMINO-6-HYDROXY-9H-PURIN-9-YL)-3-HYDROXY-TETRAHYDROFURAN-2-YL)METHYL"),
   9.149 +('g', True, "PGN", "2'-DEOXYGUANOSINE-3',5'-DIPHOSPHATE"),
   9.150 +('g', True, "PPW", "7-DEAZA-8-AZA-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
   9.151 +('g', True, "S4G", "2'-deoxy-4'-thioguanosine 5'-(dihydrogen phosphate)"),
   9.152 +('g', True, "S6G", "6-THIO-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"),
   9.153 +('g', True, "SDG", "2-AMINO-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURINE-6-SELENOL"),
   9.154 +('g', True, "TGP", "5'-THIO-2'-DEOXY-GUANOSINE PHOSPHONIC ACID"),
   9.155 +('g', True, "X", "2'-DEOXY-N7-(8,9-DIHYDRO-9-HYDROXY-10-DEHYDROXY-AFLATOXIN)GUANOSINE MONOPHOSPHATE"),
   9.156 +('g', True, "XGL", "[(1S,4R,6R)-6-HYDROXY-4-(GUANIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"),
   9.157 +('g', True, "XGU", "2-AMINO-9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE"),
   9.158 +('g', True, "XUG", "2'-SE-METHYL-2'-SELENOGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.159 +('n', True, "0AU", "5-iodouridine 5'-(dihydrogen phosphate)"),
   9.160 +('n', True, "2BD", "N1-(1-HYDROXY-3-BUTEN-2(S)-YL)-2'-DEOXYINOSINE MONO PHOSPHORIC ACID"),
   9.161 +('n', True, "2DF", "N-(2-DEOXY-BETA-D-ERYTHO-PENTOFURANOSYL-5-PHOSPHATE)"),
   9.162 +('n', True, "2DM", "2-HYDROXY-3-(PYREN-1-YLMETHOXY)PROPYL"),
   9.163 +('n', True, "2FE", "2'-FLUORO-2'-DEOXY-1,N6-ETHENOADENINE"),
   9.164 +('n', True, "2FI", "2'-FLUORO-2'-DEOXYINOSINE"),
   9.165 +('n', True, "3DR", "1',2'-DIDEOXYRIBOFURANOSE-5'-PHOSPHATE"),
   9.166 +('n', True, "3ME", "PHOSPHORIC"),
   9.167 +('n', True, "4MF", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-METHYL-1H-INDOLE"),
   9.168 +('n', True, "5HU", "5-HYDROXYMETHYLURIDINE-2'-DEOXY-5'-MONOPHOSPHATE"),
   9.169 +('n', True, "5IU", "5-IODO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.170 +('n', True, "5MD", "5-METHYL-2'-DEOXYPSEUDOURIDINE"),
   9.171 +('n', True, "6MI", "6-METHYL-8-(2-DEOXY-RIBOFURANOSYL)ISOXANTHOPTERIDINE"),
   9.172 +('n', True, "A1P", "9-{2-DEOXY-5-O-[HYDROXY(OXIDO)PHOSPHINO]-BETA-L-ERYTHRO-PENTOFURANOSYL}-9H-PURIN-2-AMINE"),
   9.173 +('n', True, "ABT", "3'-AZIDO-3'-DEOXY-THYMIDINE-5'-ALPHA BORANO TRIPHOSPHATE"),
   9.174 +('n', True, "AFF", "2-ACETYLAMINOFLUORENE-3-YL"),
   9.175 +('n', True, "ASU", "4'-THIO-2'4'-DIDEOXYRIBOFURANOSE-5'-PHOSPHATE"),
   9.176 +('n', True, "B1P", "2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSE"),
   9.177 +('n', True, "BRU", "5-BROMO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.178 +('n', True, "BVP", "(E)-5-(2-BROMOVINYL)-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.179 +('n', True, "BZG", "6-(BENZYLOXY)-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURIN-2-AMINE"),
   9.180 +('n', True, "D1P", "2'-DEOXY-RIBOFURANOSE-5'-PHOSPHATE"),
   9.181 +('n', True, "D3", "1-(2-DEOXY-BETA-D-RIBOFURANOSYL)-4-(3-BENZAMIDO)PHENYLIMIDAZOLE"),
   9.182 +('n', True, "DDN", "3,4-DIHYDRO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.183 +('n', True, "DDX", "2',3'-DEHYDRO-2',3'-DIDEOXYRIBOFURANOSE-5'-PHOSPHATE"),
   9.184 +('n', True, "DFT", "1-[2-DEOXYRIBOFURANOSYL]-2,4-DIFLUORO-5-METHYL-BENZENE-5'MONOPHOSPHATE"),
   9.185 +('n', True, "DI", "2'-DEOXYINOSINE-5'-MONOPHOSPHATE"),
   9.186 +('n', True, "DPY", "2-DEOXYRIBOFURANOSYL-PYRIDINE-2,6-DICARBOXYLIC ACID-5'-MONOPHOSPHATE"),
   9.187 +('n', True, "DRM", "{[(1R,2S)-2-(2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOPENTYL]OXY}METHYLPHOSPHONIC"),
   9.188 +('n', True, "DRP", "2-DEOXYRIBOFURANOSYL-PYRIDINE-5'-MONOPHOSPHATE"),
   9.189 +('n', True, "DRZ", "3',4'-DIHYDROXY-PENTANAL-5'-PHOSPHATE"),
   9.190 +('n', True, "DU", "2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.191 +('n', True, "DXD", "(1S,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL"),
   9.192 +('n', True, "DXN", "(1R,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL"),
   9.193 +('n', True, "FAG", "[1',2'-DIDEOXY[2-AMINO-5-([9-HYDROXY-AFLATOXINB2-8-YL]-FORMYL-AMINO)-6-OXO-1,6-IHYDRO-PYRIMIDIN-4-YLAMINO]-RIBOFURANOSE]-5-MONOPHOSPHATE GROUP"),
   9.194 +('n', True, "FFD", "(1R)-1,4-ANHYDRO-2-DEOXY-1-(3-FLUOROPHENYL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL"),
   9.195 +('n', True, "GMU", "2'-O-[(2-GUANIDINIUM)ETHYL]-5-METHYLURIDINE 5'-MONOPHOSPHATE"),
   9.196 +('n', True, "GNE", "1,N2-ETHENOGUANINE"),
   9.197 +('n', True, "HDP", "[(1S,6S)-6-HYDROXY-4-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOHEX-2-EN-1-YL]METHYL"),
   9.198 +('n', True, "HEU", "3-(2-HYDROXYETHYL)-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.199 +('n', True, "HOB", "CHOLEST-5-EN-3-YL"),
   9.200 +('n', True, "HOL", "CHOLEST-5-EN-3-YL"),
   9.201 +('n', True, "IPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-5-IODOURACIL"),
   9.202 +('n', True, "LCC", "[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"),
   9.203 +('n', True, "LCH", "[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"),
   9.204 +('n', True, "LKC", "4-AMINO-1-[(1S,3R,4R,7S)-7-HYDROXY-1-(HYDROXYMETHYL)-2,5-DIOXABICYCLO[2.2.1]HEPT-3-YL]-5-METHYLPYRIMIDIN-2(1H)-ONE"),
   9.205 +('n', True, "MBZ", "1-[2-DEOXYRIBOFURANOSYL]-4-METHYL-BENZOIMIDAZOLE-5'-MONOPHOSPHATE"),
   9.206 +('n', True, "MDR", "9-(2-DEOXY-BETA-D-RIBOFURANOSYL)-6-METHYLPURINE"),
   9.207 +('n', True, "N5I", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE"),
   9.208 +('n', True, "NCX", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE-3-CARBOXAMIDE"),
   9.209 +('n', True, "NDN", "2'-DEOXY-5-NITROURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.210 +('n', True, "NP3", "1-[2-DEOXY-RIBOFURANOSYL]-1H-[3-NITRO-PYRROL]-5'-PHOSPHATE"),
   9.211 +('n', True, "NYM", "3'-DEOXY-3'-AMINOTHYMIDINE MONOPHOSPHATE"),
   9.212 +('n', True, "O2C", "3'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"),
   9.213 +('n', True, "OIP", "2'-DEOXY-INOSINIC ACID"),
   9.214 +('n', True, "P2U", "2'-DEOXY-PSEUDOURIDINE-5'MONOPHOSPHATE"),
   9.215 +('n', True, "PBT", "[3-HYDROXY-5-(5-METHYL-2,4-DIOXOTETRAHYDRO-1(2H)-PYRIMIDINYL)TETRAHYDRO-2-FURANYL]METHYL"),
   9.216 +('n', True, "PDU", "5(1-PROPYNYL)-2'-DEOXYURIDINE-5-MONOPHOSPHATE"),
   9.217 +('n', True, "T", "THYMIDINE-5'-MONOPHOSPHATE"),
   9.218 +('n', True, "T2T", "[(2S,3S,5R)-3-[(2S)-3-({[(2R,3S,4R,5R)-3-HYDROXY-4-METHOXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL}AMINO)-2-METHYL-3-OXOPROPYL]-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL DIHYDROGEN PHOSPHATE"),
   9.219 +('n', True, "THX", "PHOSPHONIC ACID 6-({6-[6-(6-CARBAMOYL-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONYL)-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDOCENE-2-CARBONYL]-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONL}-AMINO)-HEXYL ESTER 5-(5-METHYL-2,4-DIOXO-3,4-DIHYDRO-2H-PYRIMIDIN-1-YL)-TETRAHYDRO-FURAN-2-YLMETHYL ESTER"),
   9.220 +('n', True, "TLN", "[(1R,3R,4R,7S)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"),
   9.221 +('n', True, "TS", "THYMIDINE-5'-THIOPHOSPHATE"),
   9.222 +('n', True, "TT", "[(1R,3R,4S,9R,10S,12R,15AS,15BR,18BR,18CS)-10-HYDROXY-15A,15B-DIMETHYL-13,15,16,18-TETRAOXOHEXADECAHYDRO-8H-9,12-EPOXY-1,4-METHANO-2,5,7-TRIOXA-12A,14,17,18A-TETRAAZACYCLOHEXADECA[1,2,3,4-DEF]BIPHENYLEN-3-YL]METHYL DIHYDROGEN PHOSPHATE"),
   9.223 +('n', True, "U2N", "2'-AMINO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.224 +('n', True, "U33", "5-BROMO-2'-DEOXY URIDINE"),
   9.225 +('n', True, "UCL", "5-CHLORO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.226 +('n', True, "UFP", "5-FLUORO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.227 +('n', True, "UFR", "2'-DEOXY-5-FORMYLURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.228 +('n', True, "UFT", "2'-deoxy-2'-fluorouridine 5'-(dihydrogen phosphate)"),
   9.229 +('n', True, "UMS", "2'-METHYLSELENYL-2'-DEOXYURIDINE-5'-PHOSPHATE"),
   9.230 +('n', True, "US1", "2'-DEOXY-3'-THIOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.231 +('n', True, "X4A", "[(2R,3S,5S)-2,3,5-TRIHYDROXYTETRAHYDROFURAN-2-YL]METHYL"),
   9.232 +('n', True, "XAE", "3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3H-IMIDAZO[4,5-G]QUINAZOLIN-8-AMINE"),
   9.233 +('n', True, "XAR", "[(1R,4S,6S)-4-(6-AMINO-9H-PURIN-9-YL)-6-HYDROXYCYCLOHEX-2-EN-1-YL]METHYL"),
   9.234 +('n', True, "XCS", "(1R)-1-(4-AMINO-6-METHYL-2-OXO-1,2-DIHYDROQUINAZOLIN-8-YL)-1,4-ANHYDRO-2-DEOXY-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL"),
   9.235 +('n', True, "XGA", "6-AMINO-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3,7-DIHYDRO-8H-IMIDAZO[4,5-G]QUINAZOLIN-8-ONE"),
   9.236 +('n', True, "XTY", "(1R)-1,4-ANHYDRO-2-DEOXY-1-(6-METHYL-2,4-DIOXO-1,2,3,4-TETRAHYDROQUINAZOLIN-8-YL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL"),
   9.237 +('n', True, "YRR", "3-HYDROXY-PYRROLIDIN-2-YLMETHYL-MONOPHOSPHATE"),
   9.238 +('n', True, "ZDU", "5-(3-AMINOPROPYL)-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
   9.239 +('t', True, "2AT", "2'-O-ALLYL THYMIDINE-5'-MONOPHOSPHATE"),
   9.240 +('t', True, "2BT", "2'-O-BUTYL-THYMIDINE"),
   9.241 +('t', True, "2DT", "3'-DEOXYTHYMIDINE-5'-MONOPHOSPHATE"),
   9.242 +('t', True, "2GT", "2'-O-PROPARGYL THYMIDINE-5'-MONOPHOSPHATE"),
   9.243 +('t', True, "2NT", "2'-O-[2-[HYDROXY(METHYLENEAMINO)OXY]ETHYL THYMIDINE-5'-MONOPHOSPHATE"),
   9.244 +('t', True, "2OT", "2'-O-[2-(N,N-DIMETHYLAMINOOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"),
   9.245 +('t', True, "2ST", "5-METHYL-2'-SE-METHYL-2'-SELENOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
   9.246 +('t', True, "5AT", "5'-AMINO-5'-DEOXYTHYMIDINE"),
   9.247 +('t', True, "5HT", "5-HYDROXY-THYMIDINE"),
   9.248 +('t', True, "5IT", "5-IODO-THYMIDINE-5'-PHOSPHATE"),
   9.249 +('t', True, "5PY", "1-(2'-DEOXY-5'-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYLPYRIMIDIN-2(1H)-ONE"),
   9.250 +('t', True, "64T", "5-HYDROXY-THYMIDINE-5'-MONOPHOSPHATE"),
   9.251 +('t', True, "6CT", "PHOSPHORIC"),
   9.252 +('t', True, "6HT", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(THYMIN-1-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"),
   9.253 +('t', True, "ATD", "THYMIDINE-3'-PHOSPHATE"),
   9.254 +('t', True, "ATL", "[(1S,3R,4S,7R)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"),
   9.255 +('t', True, "ATM", "3'-AZIDO-3'-DEOXYTHYMIDINE-5'-MONOPHOSPHATE"),
   9.256 +('t', True, "BOE", "2'-O-[2-(BENZYLOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"),
   9.257 +('t', True, "CTG", "(5R,6S)-5,6-DIHYDRO-5,6-DIHYDROXYTHYMIDINE-5'-MONOPHOSPHATE"),
   9.258 +('t', True, "D3T", "2',3'-DIDEOXY-THYMIDINE-5'-TRIPHOSPHATE"),
   9.259 +('t', True, "D4M", "[(5R)-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-2,5-DIHYDROFURAN-2-YL]METHYL"),
   9.260 +('t', True, "DPB", "(S)-1-[2'-DEOXY-3',5'-O-(1-PHOSPHONO)BENZYLIDENE-B-D-THREO-PENTOFURANOSYL]THYMINE"),
   9.261 +('t', True, "DRT", "2'-DEOXY-L-RIBO-FURANOSYL THYMINE-5'-MONOPHOSPHATE"),
   9.262 +('t', True, "EIT", "((3R,4R,5R)-4-(2-(1H-IMIDAZOL-1-YL)ETHOXY)-3-HYDROXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-TETRAHYDROFURAN-2-YL)METHYL"),
   9.263 +('t', True, "MMT", "5'-O-(DIMETHYLAMINO)-THYMIDINE"),
   9.264 +('t', True, "MTR", "(5-METHYL-6-OXO-1,6-DIHYDRO-PYRIDIN-3-YL)-1,2-DIDEOXY-RIBOFURANOSE-5-MONOPHOSPHATE"),
   9.265 +('t', True, "NMS", "1-(O2-(2-METHYLAMINO-2-OXO-ETHYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE"),
   9.266 +('t', True, "NMT", "1-(O2-(METHYLCARBAMOYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE"),
   9.267 +('t', True, "P2T", "2'-O-PROPYL THYMIDINE-5-MONOPHOSPHATE"),
   9.268 +('t', True, "PST", "THYMIDINE-5'-THIOPHOSPHATE"),
   9.269 +('t', True, "S2M", "2'-O-[2-(METHOXY)ETHYL]-2-THIOTHYMIDINE-5'-MONOPHOSPHATE"),
   9.270 +('t', True, "SPT", "5'-THIO-THYMIDINE PHOSPHONIC ACID"),
   9.271 +('t', True, "T32", "6'-ALPHA-METHYL CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"),
   9.272 +('t', True, "T36", "SPLIT LINKAGE THYMIDINE 5'-MONOPHOSPHATE"),
   9.273 +('t', True, "T37", "3'-AMINO-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"),
   9.274 +('t', True, "T3P", "THYMIDINE-3'-PHOSPHATE"),
   9.275 +('t', True, "T48", "6'-ALPHA-HYDROXY CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"),
   9.276 +('t', True, "T49", "S4'-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"),
   9.277 +('t', True, "T4S", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-HYDROSELENO-5-METHYLPYRIMIDIN-2(1H)-ONE"),
   9.278 +('t', True, "T5S", "2'-deoxy-5-(methylselanyl)uridine 5'-phosphate"),
   9.279 +('t', True, "TA3", "(4S,5R)-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYL-1,3-DIAZABICYCLO[2.2.0]HEXAN-2-ONE"),
   9.280 +('t', True, "TAF", "2'-DEOXY-2'-FLUORO-ARABINO-FURANOSYL THYMINE-5'-PHOSPHATE"),
   9.281 +('t', True, "TCP", "5'-METHYLTHYMIDINE"),
   9.282 +('t', True, "TFE", "2'-O-[2-(TRIFLUORO)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"),
   9.283 +('t', True, "TFT", "(L)-ALPHA-THREOFURANOSYL-THYMINE-3'-MONOPHOSPHATE"),
   9.284 +('t', True, "TLC", "2-O,3-ETHDIYL-ARABINOFURANOSYL-THYMINE-5'-MONOPHOSPHATE"),
   9.285 +('t', True, "TP1", "2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE"),
   9.286 +('t', True, "TPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE"),
   9.287 +('t', True, "TTD", "CIS-SYN"),
   9.288 +('t', True, "TTM", "N3-ETHYL-THYMIDINE-5'-MONOPHOSPHATE"),
   9.289 +('t', True, "XTH", "1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-5-METHYLPYRIMIDINE-2,4(1H,3H)-DIONE"),
   9.290 +('t', True, "XTL", "[(1S,4R,6R)-6-HYDROXY-4-(THYMIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"),
   9.291 +)
   9.292 +
   9.293 +protein = (
   9.294 +('A', False, "ALA", "ALANINE"),
   9.295 +('C', False, "CYS", "CYSTEINE"),
   9.296 +('D', False, "ASP", "ASPARTIC"),
   9.297 +('E', False, "GLU", "GLUTAMIC"),
   9.298 +('F', False, "PHE", "PHENYLALANINE"),
   9.299 +('G', False, "GLY", "GLYCINE"),
   9.300 +('H', False, "HIS", "HISTIDINE"),
   9.301 +('I', False, "ILE", "ISOLEUCINE"),
   9.302 +('K', False, "LYS", "LYSINE"),
   9.303 +('L', False, "LEU", "LEUCINE"),
   9.304 +('M', False, "MET", "METHIONINE"),
   9.305 +('N', False, "ASN", "ASPARAGINE"),
   9.306 +('P', False, "PRO", "PROLINE"),
   9.307 +('Q', False, "GLN", "GLUTAMINE"),
   9.308 +('R', False, "ARG", "ARGININE"),
   9.309 +('S', False, "SER", "SERINE"),
   9.310 +('T', False, "THR", "THREONINE"),
   9.311 +('V', False, "VAL", "VALINE"),
   9.312 +('W', False, "TRP", "TRYPTOPHAN"),
   9.313 +('X', False, "", "Undefined Aminoacid"),
   9.314 +('Y', False, "TYR", "TYROSINE"),
   9.315 +('a', True, "0CS", "3-[(S)-HYDROPEROXYSULFINYL]-L-ALANINE"),
   9.316 +('a', True, "0NC", "N-METHYL-L-ALANINAMIDE"),
   9.317 +('a', True, "AA3", "2-AMINOBUTYRIC"),
   9.318 +('a', True, "AA4", "2-AMINO-5-HYDROXYPENTANOIC"),
   9.319 +('a', True, "ABA", "ALPHA-AMINOBUTYRIC"),
   9.320 +('a', True, "AHO", "N-ACETYL-N-HYDROXY-L-ORNITHINE"),
   9.321 +('a', True, "AHP", "2-AMINO-HEPTANOIC"),
   9.322 +('a', True, "AIB", "ALPHA-AMINOISOBUTYRIC"),
   9.323 +('a', True, "ALC", "2-AMINO-3-CYCLOHEXYL-PROPIONIC"),
   9.324 +('a', True, "ALM", "1-METHYL-ALANINAL"),
   9.325 +('a', True, "ALN", "NAPHTHALEN-2-YL-3-ALANINE"),
   9.326 +('a', True, "ALS", "2-AMINO-3-OXO-4-SULFO-BUTYRIC"),
   9.327 +('a', True, "ALT", "THIOALANINE"),
   9.328 +('a', True, "APH", "P-AMIDINOPHENYL-3-ALANINE"),
   9.329 +('a', True, "AYA", "N-ACETYLALANINE"),
   9.330 +('a', True, "B2A", "ALANINE"),
   9.331 +('a', True, "B3A", "(3S)-3-AMINOBUTANOIC"),
   9.332 +('a', True, "BAL", "BETA-ALANINE"),
   9.333 +('a', True, "BNN", "ACETYL-P-AMIDINOPHENYLALANINE"),
   9.334 +('a', True, "CAB", "4-CARBOXY-4-AMINOBUTANAL"),
   9.335 +('a', True, "CLB", "D-PARA-CHLOROPHENYL-1-ACETAMIDOBORONIC"),
   9.336 +('a', True, "CLD", "D-PARA-CHLOROPHENYL-1-ACTEAMIDOBORONIC"),
   9.337 +('a', True, "DAB", "2,4-DIAMINOBUTYRIC"),
   9.338 +('a', True, "DAL", "D-ALANINE"),
   9.339 +('a', True, "DBU", "(2E)-2-AMINOBUT-2-ENOIC"),
   9.340 +('a', True, "DBZ", "3-(BENZOYLAMINO)-L-ALANINE"),
   9.341 +('a', True, "DHA", "2-AMINO-ACRYLIC"),
   9.342 +('a', True, "DNP", "3-AMINO-ALANINE"),
   9.343 +('a', True, "DPP", "DIAMMINOPROPANOIC"),
   9.344 +('a', True, "FLA", "TRIFLUOROALANINE"),
   9.345 +('a', True, "HAC", "BETA-CYCLOHEXYL-ALANINE"),
   9.346 +('a', True, "HMF", "2-AMINO-4-PHENYL-BUTYRIC"),
   9.347 +('a', True, "HV5", "TERT-BUTYLALANINE"),
   9.348 +('a', True, "IAM", "4-[(ISOPROPYLAMINO)METHYL]PHENYLALANINE"),
   9.349 +('a', True, "KYN", "KYNURENINE"),
   9.350 +('a', True, "LAL", "N,N-DIMETHYL-L-ALANINE"),
   9.351 +('a', True, "MA", "METHYL"),
   9.352 +('a', True, "MAA", "N-METHYLALANINE"),
   9.353 +('a', True, "MSP", "5'-O-[(L-METHIONYL)-SULPHAMOYL]ADENOSINE"),
   9.354 +('a', True, "NAL", "BETA-(2-NAPHTHYL)-ALANINE"),
   9.355 +('a', True, "NAM", "NAM"),
   9.356 +('a', True, "NCB", "N-CARBAMOYL-ALANINE"),
   9.357 +('a', True, "ORN", "ORNITHINE"),
   9.358 +('a', True, "PAU", "PANTOTHENOIC"),
   9.359 +('a', True, "PRR", "3-(METHYL-PYRIDINIUM)ALANINE"),
   9.360 +('a', True, "PYA", "3-(1,10-PHENANTHROL-2-YL)-L-ALANINE"),
   9.361 +('a', True, "S2P", "(2S)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC"),
   9.362 +('a', True, "SEC", "2-AMINO-3-SELENINO-PROPIONIC"),
   9.363 +('a', True, "SEG", "HYDROXYALANINE"),
   9.364 +('a', True, "TIH", "BETA(2-THIENYL)ALANINE"),
   9.365 +('a', True, "UMA", "URIDINE-5'-DIPHOSPHATE-N-ACETYLMURAMOYL-L-ALANINE"),
   9.366 +('c', True, "0A8", "S-[(2-CHLOROETHYL)CARBAMOYL]-L-CYSTEINE"),
   9.367 +('c', True, "143", "S-2,3-DIHYDRO-5-GLYCIN-2-YL-ISOXAZOL-3-YL-CYSTEINE"),
   9.368 +('c', True, "2CO", "S-HYDROPEROXYCYSTEINE"),
   9.369 +('c', True, "5CS", "2-AMINO-3-(CYSTEIN-S-YL)-ISOXAZOLIDIN-5-YL-ACETIC"),
   9.370 +('c', True, "BBC", "3-[(4-AMINOBUTYL)SULFINYL]-2-IMINOPROPAN-1-OL"),
   9.371 +('c', True, "BCS", "BENZYLCYSTEINE"),
   9.372 +('c', True, "BCX", "BETA-3-CYSTEINE"),
   9.373 +('c', True, "BPE", "(2S)-2-AMINO-3-[(3-AMINOPROPYL)SULFANYL]PROPAN-1-OL"),
   9.374 +('c', True, "BTC", "CYSTEINE"),
   9.375 +('c', True, "BUC", "S,S-BUTYLTHIOCYSTEINE"),
   9.376 +('c', True, "C3Y", "S-[(1S)-1-HYDROXY-1-(HYDROXYAMINO)ETHYL]-L-CYSTEINE"),
   9.377 +('c', True, "C5C", "S-CYCLOPENTYL"),
   9.378 +('c', True, "C6C", "S-CYCLOHEXYL"),
   9.379 +('c', True, "CAF", "S-DIMETHYLARSINOYL-CYSTEINE"),
   9.380 +('c', True, "CAS", "S-(DIMETHYLARSENIC)CYSTEINE"),
   9.381 +('c', True, "CAY", "CARBOXYMETHYLENECYSTEINE"),
   9.382 +('c', True, "CCS", "CARBOXYMETHYLATED"),
   9.383 +('c', True, "CEA", "S-HYDROXY-CYSTEINE"),
   9.384 +('c', True, "CME", "S,S-(2-HYDROXYETHYL)THIOCYSTEINE"),
   9.385 +('c', True, "CMH", "S-(METHYLMERCURY)-L-CYSTEINE"),
   9.386 +('c', True, "CML", "(2S)-2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}BUTANEDIOIC"),
   9.387 +('c', True, "CMT", "O-METHYLCYSTEINE"),
   9.388 +('c', True, "CS0", "S-(2-HYDROXYETHYL)-L-CYSTEINE"),
   9.389 +('c', True, "CS1", "S-(2-ANILINYL-SULFANYL)-CYSTEINE"),
   9.390 +('c', True, "CS3", "S-[3-OXO-3-(2-THIENYL)PROPYL]-L-CYSTEINE"),
   9.391 +('c', True, "CS4", "S-[3-(3,4-DICHLOROPHENYL)-3-OXOPROPYL]-L-CYSTEINE"),
   9.392 +('c', True, "CSA", "S-ACETONYLCYSTEINE"),
   9.393 +('c', True, "CSB", "CYS"),
   9.394 +('c', True, "CSD", "3-SULFINOALANINE"),
   9.395 +('c', True, "CSE", "SELENOCYSTEINE"),
   9.396 +('c', True, "CSO", "S-HYDROXYCYSTEINE"),
   9.397 +('c', True, "CSP", "S-PHOSPHOCYSTEINE"),
   9.398 +('c', True, "CSR", "S-ARSONOCYSTEINE"),
   9.399 +('c', True, "CSS", "S-MERCAPTOCYSTEINE"),
   9.400 +('c', True, "CSU", "CYSTEINE-S-SULFONIC"),
   9.401 +('c', True, "CSW", "CYSTEINE-S-DIOXIDE"),
   9.402 +('c', True, "CSX", "S-OXY"),
   9.403 +('c', True, "CSZ", "S-SELANYL"),
   9.404 +('c', True, "CY0", "S-{3-[(4-ANILINOQUINAZOLIN-6-YL)AMINO]-3-OXOPROPYL}-L-CYSTEINE"),
   9.405 +('c', True, "CY1", "ACETAMIDOMETHYLCYSTEINE"),
   9.406 +('c', True, "CY3", "2-AMINO-3-MERCAPTO-PROPIONAMIDE"),
   9.407 +('c', True, "CY4", "S-BUTYRYL-CYSTEIN"),
   9.408 +('c', True, "CYA", "TWO"),
   9.409 +('c', True, "CYD", "2-AMINO-6-(CYSTEIN-S-YL)-5-OXO-HEXANOIC"),
   9.410 +('c', True, "CYF", "5-[2-(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETYLAMINO]-2-(3,6-DIHYDROXY-9,9A-DIHYDRO-3H-XANTHEN-9-YL)-BENZOIC"),
   9.411 +('c', True, "CYG", "2-AMINO-4-(AMINO-3-OXO-PROPYLSULFANYLCARBONYL)-BUTYRIC"),
   9.412 +('c', True, "CYM", "S-METHYLCYSTEINE"),
   9.413 +('c', True, "CYQ", "2-AMINO-3-PHOSPHONOMETHYLSULFANYL-PROPIONIC"),
   9.414 +('c', True, "CYR", "N~5~-[{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}(IMINIO)METHYL]-L-ORNITHINATE"),
   9.415 +('c', True, "CZ2", "S-(DIHYDROXYARSINO)CYSTEINE"),
   9.416 +('c', True, "CZZ", "THIARSAHYDROXY-CYSTEINE"),
   9.417 +('c', True, "DCY", "D-CYSTEINE"),
   9.418 +('c', True, "DYS", "S-[5-(2-AMINOETHYL)-2,3-DIHYDROXYPHENYL]-L-CYSTEINE"),
   9.419 +('c', True, "EFC", "S,S-(2-FLUOROETHYL)THIOCYSTEINE"),
   9.420 +('c', True, "FOE", "2-(2-AMINO-3-OXO-PROPYLSULFANYL)-N-(4-FLUORO-PHENYL)-N-ISOPROPYL-ACETAMIDE"),
   9.421 +('c', True, "GT9", "S-NONYL-CYSTEINE"),
   9.422 +('c', True, "HTI", "(4S)-4-{[(2S)-2-AMINO-3-OXOPROPYL]SULFANYL}-L-HOMOSERINE"),
   9.423 +('c', True, "K1R", "(2S)-2-AMINO-4-[({[(2R)-2-AMINO-2-CARBOXYETHYL]THIO}AMINO)SULFINYL]BUTANOIC"),
   9.424 +('c', True, "M0H", "S-(HYDROXYMETHYL)-L-CYSTEINE"),
   9.425 +('c', True, "MCS", "MALONYL"),
   9.426 +('c', True, "NPH", "CYSTEINE-METHYLENE-CARBAMOYL-1,10-PHENANTHROLINE"),
   9.427 +('c', True, "NYS", "S-{5-[(1R)-2-AMINO-1-HYDROXYETHYL]-2,3-DIHYDROXYPHENYL}-L-CYSTEINE"),
   9.428 +('c', True, "OCS", "CYSTEINESULFONIC"),
   9.429 +('c', True, "OCY", "HYDROXYETHYLCYSTEINE"),
   9.430 +('c', True, "P1L", "S-PALMITOYL-L-CYSTEINE"),
   9.431 +('c', True, "PBB", "S-(4-BROMOBENZYL)CYSTEINE"),
   9.432 +('c', True, "PEC", "S,S-PENTYLTHIOCYSTEINE"),
   9.433 +('c', True, "PR3", "S,S-PROPYLTHIOCYSTEINE"),
   9.434 +('c', True, "PYX", "S-[S-THIOPYRIDOXAMINYL]CYSTEINE"),
   9.435 +('c', True, "R1A", "3-{[(2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE"),
   9.436 +('c', True, "R1B", "3-{[(2,2,4,5,5-PENTAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-L-ALANINE"),
   9.437 +('c', True, "R1F", "3-{[(2,2,5,5-TETRAMETHYL-1-OXO-4-PHENYL-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE"),
   9.438 +('c', True, "R7A", "3-S-[(4-BROMO-2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROL-3-YL)METHYL]SULFANYL-L-CYSTEINE"),
   9.439 +('c', True, "RCY", "S-[(3S,3'R)-1'-HYDROXY-2',2',5',5'-TETRAMETHYL-2,5-DIOXO-1,3'-BIPYRROLIDIN-3-YL]-L-CYSTEINE"),
   9.440 +('c', True, "SAH", "S-ADENOSYL-L-HOMOCYSTEINE"),
   9.441 +('c', True, "SCH", "S-METHYL-THIO-CYSTEINE"),
   9.442 +('c', True, "SCS", "3-(ETHYLDISULFANYL)-L-ALANINE"),
   9.443 +('c', True, "SCY", "S-ACETYL-CYSTEINE"),
   9.444 +('c', True, "SHC", "S-HEXYLCYSTEINE"),
   9.445 +('c', True, "SIB", "(2S)-2-AMINO-4-({[(2S,3S,4R,5R)-3,4-DIHYDROXY-5-(6-OXO-1,6-DIHYDRO-9H-PURIN-9-YL)TETRAHYDROFURAN-2-YL]METHYL}THIO)BUTANOIC"),
   9.446 +('c', True, "SMC", "S-METHYLCYSTEINE"),
   9.447 +('c', True, "SNC", "S-NITROSO-CYSTEINE"),
   9.448 +('c', True, "SOC", "DIOXYSELENOCYSTEINE"),
   9.449 +('c', True, "SYS", "3-[(2-AMINO-2-OXOETHYL)SELANYL]-L-ALANINE"),
   9.450 +('c', True, "TNB", "S-(2,3,6-TRINITROPHENYL)CYSTEINE"),
   9.451 +('c', True, "YCM", "S-(2-AMINO-2-OXOETHYL)-L-CYSTEINE"),
   9.452 +('d', True, "0A0", "2-METHYL-L-ASPARTIC"),
   9.453 +('d', True, "0AK", "(2S)-2-AMINO-4-(2-CHLOROETHOXY)-4-OXOBUTANOIC"),
   9.454 +('d', True, "3MD", "2S,3S-3-METHYLASPARTIC"),
   9.455 +('d', True, "ACB", "3-METHYL-ASPARTIC"),
   9.456 +('d', True, "AEI", "THREONINE-ASPARTIC"),
   9.457 +('d', True, "AKL", "3-AMINO-5-CHLORO-4-OXOPENTANOIC"),
   9.458 +('d', True, "AS2", "(2R)-2-AMINO-4-OXOBUTANOIC"),
   9.459 +('d', True, "ASA", "ASPARTIC"),
   9.460 +('d', True, "ASB", "ASPARTIC"),
   9.461 +('d', True, "ASI", "L-ISO-ASPARTATE"),
   9.462 +('d', True, "ASK", "DEHYDROXYMETHYLASPARTIC"),
   9.463 +('d', True, "ASL", "ASPARTIC"),
   9.464 +('d', True, "ASQ", "PHOSPHOASPARTATE"),
   9.465 +('d', True, "B3D", "3-AMINOPENTANEDIOIC"),
   9.466 +('d', True, "BFD", "ASPARTATE"),
   9.467 +('d', True, "BHD", "BETA-HYDROXYASPARTIC"),
   9.468 +('d', True, "DAS", "D-ASPARTIC"),
   9.469 +('d', True, "DMK", "3,3-DIMETHYL"),
   9.470 +('d', True, "DOH", "BETA-HYDROXY"),
   9.471 +('d', True, "DSP", "D-ASPARTIC"),
   9.472 +('d', True, "IAS", "BETA-ASPARTYL"),
   9.473 +('d', True, "LAA", "(3R)-3-HYDROXY-L-ALPHA-ASPARAGINE"),
   9.474 +('d', True, "OHS", "O-(CARBOXYSULFANYL)-4-OXO-L-HOMOSERINE"),
   9.475 +('d', True, "OXX", "OXALYL-ASPARTYL"),
   9.476 +('d', True, "PAS", "2-AMINO-4-OXO-4-PHOSPHONOOXY-BUTYRIC"),
   9.477 +('d', True, "PHD", "ASPARTYL"),
   9.478 +('d', True, "TAV", "N-METHYL-N-{2-[(2-NAPHTHYLSULFONYL)AMINO]-5-[(2-NAPHTHYLSULFONYL)OXY]BENZOYL}-L-ASPARTIC"),
   9.479 +('e', True, "5HP", "PYROGLUTAMIC"),
   9.480 +('e', True, "AR4", "2-AMINO-5-(3-FLUORO-3,4-DIHYDROXY-5-HYDROXYMETHYL-TETRAHYDRO-FURAN-2-YLOXY)-5-HYDROXY-PENTANOIC"),
   9.481 +('e', True, "B3E", "(3S)-3-AMINOHEXANEDIOIC"),
   9.482 +('e', True, "CGA", "CARBOXYMETHYLATED"),
   9.483 +('e', True, "CGU", "GAMMA-CARBOXY-GLUTAMIC"),
   9.484 +('e', True, "CRU", "4-[(4Z)-1-(CARBOXYMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-2-YL]-4-IMINOBUTANOIC"),
   9.485 +('e', True, "DGL", "D-GLUTAMIC"),
   9.486 +('e', True, "GAU", "(4S)-4-AMINO-5-HYDROXYPENTANOIC"),
   9.487 +('e', True, "GGL", "GAMMA-GLUTAMIC"),
   9.488 +('e', True, "GLQ", "4-AMINO-5-OXO-PENTANOIC"),
   9.489 +('e', True, "GMA", "4-AMIDO-4-CARBAMOYL-BUTYRIC"),
   9.490 +('e', True, "GSU", "O5'-(L-GLUTAMYL-SULFAMOYL)-ADENOSINE"),
   9.491 +('e', True, "ILG", "GLUTAMYL"),
   9.492 +('e', True, "LME", "(3R)-3-METHYL-L-GLUTAMIC"),
   9.493 +('e', True, "MEG", "(2S,3R)-3-METHYL-GLUTAMIC"),
   9.494 +('e', True, "NHL", "(4S)-4-(2-NAPHTHYLMETHYL)-D-GLUTAMIC"),
   9.495 +('e', True, "PCA", "PYROGLUTAMIC"),
   9.496 +('f', True, "0A9", "METHYL"),
   9.497 +('f', True, "1PA", "PHENYLMETHYLACETIC"),
   9.498 +('f', True, "200", "4-CHLORO-L-PHENYLALANINE"),
   9.499 +('f', True, "23F", "(2Z)-2-AMINO-3-PHENYLACRYLIC"),
   9.500 +('f', True, "4PH", "4-METHYL-L-PHENYLALANINE"),
   9.501 +('f', True, "B1F", "PHENYLALANINE"),
   9.502 +('f', True, "B2F", "PHENYLALANINE"),
   9.503 +('f', True, "BIF", "(R)-2-AMINO-3-(4-PHENYLCYCLOHEXYL)PROPANOIC"),
   9.504 +('f', True, "DAH", "3,4-DIHYDROXYPHENYLALANINE"),
   9.505 +('f', True, "DPH", "DEAMINO-METHYL-PHENYLALANINE"),
   9.506 +('f', True, "DPN", "D-PHENYLALANINE"),
   9.507 +('f', True, "EHP", "3-HYDROXYPHENYLALANINE"),
   9.508 +('f', True, "FCL", "3-CHLORO-L-PHENYLALANINE"),
   9.509 +('f', True, "FOG", "PHENYLALANINOYL-[1-HYDROXY]-2-PROPYLENE"),
   9.510 +('f', True, "FPA", "1,1"),
   9.511 +('f', True, "HPC", "3-AMINO-4-PHENYL-BUTAN-2-ONE"),
   9.512 +('f', True, "HPE", "HOMOPHENYLALANINE"),
   9.513 +('f', True, "HPQ", "HOMOPHENYLALANINYLMETHANE"),
   9.514 +('f', True, "IOY", "P-IODO-D-PHENYLALANINE"),
   9.515 +('f', True, "MEA", "N-METHYLPHENYLALANINE"),
   9.516 +('f', True, "NDF", "N-(CARBOXYCARBONYL)-D-PHENYLALANINE"),
   9.517 +('f', True, "NFA", "PHENYLALANINE"),
   9.518 +('f', True, "PBF", "PARA-(BENZOYL)-PHENYLALANINE"),
   9.519 +('f', True, "PCS", "PHENYLALANYLMETHYLCHLORIDE"),
   9.520 +('f', True, "PF5", "2,3,4,5,6-PENTAFLUORO-L-PHENYLALANINE"),
   9.521 +('f', True, "PFF", "4-FLUORO-L-PHENYLALANINE"),
   9.522 +('f', True, "PHA", "PHENYLALANINAL"),
   9.523 +('f', True, "PHI", "IODO-PHENYLALANINE"),
   9.524 +('f', True, "PHL", "L-PHENYLALANINOL"),
   9.525 +('f', True, "PHM", "PHENYLALANYLMETHANE"),
   9.526 +('f', True, "PM3", "2-AMINO-3-(4-PHOSPHONOMETHYL-PHENYL)-PROPIONIC"),
   9.527 +('f', True, "PPN", "PARA-NITROPHENYLALANINE"),
   9.528 +('f', True, "PSA", "3-HYDROXY-4-AMINO-5-PHENYLPENTANOIC"),
   9.529 +('f', True, "SMF", "4-SULFOMETHYL-L-PHENYLALANINE"),
   9.530 +('f', True, "T11", "4-[3-(TRIFLUOROMETHYL)DIAZIRIDIN-3-YL]-L-PHENYLALANINE"),
   9.531 +('f', True, "TFQ", "4-(2,2,2-TRIFLUOROETHYL)-L-PHENYLALANINE"),
   9.532 +('g', True, "0AC", "(4S,5S)-5-AMINO-4-HYDROXY-6-PHENYLHEXANOIC"),
   9.533 +('g', True, "2AG", "(2S)-2-AMINOPENT-4-ENOIC"),
   9.534 +('g', True, "CHP", "3-CHLORO-4-HYDROXYPHENYLGLYCINE"),
   9.535 +('g', True, "CR5", "(2R)-2-(AMINOMETHYL)-2,4-DIHYDROXY-5-OXO-3-(2-OXOETHYL)-2,5-DIHYDRO-1H-IMIDAZOL-3-IUM"),
   9.536 +('g', True, "CSI", "AMINO-(2-IMINO-HEXAHYDRO-PYRIMIDIN-4-YL)-ACETIC"),
   9.537 +('g', True, "FGL", "2-AMINOPROPANEDIOIC"),
   9.538 +('g', True, "GHP", "4-HYDROXYPHENYLGLYCINE"),
   9.539 +('g', True, "GL3", "THIOGLYCIN"),
   9.540 +('g', True, "GLZ", "AMINO-ACETALDEHYDE"),
   9.541 +('g', True, "GSC", "2-ETHYLTHIO"),
   9.542 +('g', True, "IGL", "ALPHA-AMINO-2-INDANACETIC"),
   9.543 +('g', True, "IPG", "N-ISOPROPYL"),
   9.544 +('g', True, "LPG", "L-PROPARGYLGLYCINE"),
   9.545 +('g', True, "LVG", "L-VINYLGLYCINE"),
   9.546 +('g', True, "MEU", "O-METHYL-GLYCINE"),
   9.547 +('g', True, "MGY", "N-METHYLGLYCINE"),
   9.548 +('g', True, "MPQ", "N-METHYL-ALPHA-PHENYL-GLYCINE"),
   9.549 +('g', True, "MSA", "(2-S-METHYL)"),
   9.550 +('g', True, "NMC", "N-CYCLOPROPYLMETHYL"),
   9.551 +('g', True, "PG9", "D-PHENYLGLYCINE"),
   9.552 +('g', True, "PGY", "PHENYLGLYCINE"),
   9.553 +('g', True, "SAR", "SARCOSINE"),
   9.554 +('g', True, "SHP", "(4-HYDROXYMALTOSEPHENYL)GLYCINE"),
   9.555 +('g', True, "TBG", "T-BUTYL"),
   9.556 +('h', True, "3AH", "[HISTIDIN-1-YL-4H-[1,2,4]TRIAZOL-5-YL]-AMINE"),
   9.557 +('h', True, "DDE", "{3-[4-(2-AMINO-2-CARBOXY-ETHYL)-1H-IMIDAZOL-2-YL]-1-CARBAMOYL-PROPYL}-TRIMETHYL-AMMONIUM"),
   9.558 +('h', True, "DHI", "D-HISTIDINE"),
   9.559 +('h', True, "HBN", "N-(2-NAPHTHYL)HISTIDINAMIDE"),
   9.560 +('h', True, "HIA", "L-HISTIDINE"),
   9.561 +('h', True, "HIC", "4-METHYL-HISTIDINE"),
   9.562 +('h', True, "HIP", "ND1-PHOSPHONOHISTIDINE"),
   9.563 +('h', True, "HIQ", "1-[1,2-DIHYDROXY-1-(HYDROXYMETHYL)ETHYL]-L-HISTIDINE"),
   9.564 +('h', True, "HS8", "3-(1-SULFO-1H-IMIDAZOL-3-IUM-4-YL)-L-ALANINE"),
   9.565 +('h', True, "HSO", "HISTIDINOL"),
   9.566 +('h', True, "MHS", "N1-METHYLATED"),
   9.567 +('h', True, "NEM", "NE2-METHYLATED"),
   9.568 +('h', True, "NEP", "N1-PHOSPHONOHISTIDINE"),
   9.569 +('h', True, "NZH", "(2S)-2-AMINO-3-[1-(1H-TETRAAZOL-5-YL)-1H-IMIDAZOL-4-YL]PROPANAL"),
   9.570 +('h', True, "OHI", "3-(2-OXO-2H-IMIDAZOL-4-YL)-L-ALANINE"),
   9.571 +('h', True, "PSH", "1-THIOPHOSPHONO-L-HISTIDINE"),
   9.572 +('h', True, "PVH", "HISTIDINE-METHYL-ESTER"),
   9.573 +('i', True, "B2I", "ISOLEUCINE"),
   9.574 +('i', True, "BIU", "5-BROMO-L-ISOLEUCINE"),
   9.575 +('i', True, "DIL", "D-ISOLEUCINE"),
   9.576 +('i', True, "IIL", "ISO-ISOLEUCINE"),
   9.577 +('i', True, "ILX", "4,5-DIHYDROXYISOLEUCINE"),
   9.578 +('i', True, "IML", "N-METHYL-ISOLEUCINE"),
   9.579 +('k', True, "0A2", "[(1R)-1,5-DIAMINOPENTYL][BIS(ETHANOLATO)]HYDROXYBORATE(1-)"),
   9.580 +('k', True, "6CL", "6-CARBOXYLYSINE"),
   9.581 +('k', True, "ALY", "N(6)-ACETYLLYSINE"),
   9.582 +('k', True, "API", "2,6-DIAMINOPIMELIC"),
   9.583 +('k', True, "APK", "5'-O-[(S)-{[(5S)-5-AMINO-6-OXOHEXYL]AMINO}(HYDROXY)PHOSPHORYL]ADENOSINE"),
   9.584 +('k', True, "AZK", "(S)-2-AMINO-6-AZIDOHEXANOIC"),
   9.585 +('k', True, "B3K", "(3S)-3,7-DIAMINOHEPTANOIC"),
   9.586 +('k', True, "BLY", "LYSINE"),
   9.587 +('k', True, "C1X", "(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)DIHYDRO-2H-THIOPYRAN-3(4H)-YLIDENE]-L-LYSINE"),
   9.588 +('k', True, "CCL", "N~6~-[(CYCLOPENTYLOXY)CARBONYL]-D-LYSINE"),
   9.589 +('k', True, "CLG", "2-AMINO-6-[2-(2-AMINOOXY-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC"),
   9.590 +('k', True, "CLH", "2-AMINO-6-[2-(2-OXO-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC"),
   9.591 +('k', True, "DLS", "DI-ACETYL-LYSINE"),
   9.592 +('k', True, "DLY", "D-LYSINE"),
   9.593 +('k', True, "DM0", "N~2~,N~2~,N~6~,N~6~-TETRAMETHYL-L-LYSINE"),
   9.594 +('k', True, "DNL", "6-AMINO-HEXANAL"),
   9.595 +('k', True, "DNS", "N~6~-{[5-(DIMETHYLAMINO)-1-NAPHTHYL]SULFONYL}-L-LYSINE"),
   9.596 +('k', True, "FZN", "(2S)-2-amino-6-{[(1Z)-1-{[(2R,3R,4S,5R)-5-({[(R)-{[(R)-{[(2R,3S,4R,5R)-5-(6-amino-9H-purin-9-yl)-3,4-dihydroxytetrahydrofuran-2-yl]methoxy}(hydroxy)phosphoryl]oxy}(hydroxy)phosphoryl]oxy}methyl)-3,4-dihydroxytetrahydrofuran-2-yl]sulfanyl}ethylidene]amino}hexanoic acid"),
   9.597 +('k', True, "GPL", "LYSINE GUANOSINE-5'-MONOPHOSPHATE"),
   9.598 +('k', True, "I58", "4R-FLUORO-N6-ETHANIMIDOYL-L-LYSINE"),
   9.599 +('k', True, "IEL", "N~6~-[(1Z)-ETHANIMIDOYL]-L-LYSINE"),
   9.600 +('k', True, "IT1", "(E)-N~6~-({3-HYDROXY-2-METHYL-5-[(PHOSPHONOOXY)METHYL]PYRIDIN-4-YL}METHYLIDENE)-L-LYSINE"),
   9.601 +('k', True, "KCX", "LYSINE"),
   9.602 +('k', True, "KGC", "N~6~-[(2R)-2-CARBOXY-5-OXOTETRAHYDROFURAN-2-YL]-L-LYSINE"),
   9.603 +('k', True, "KPI", "(2S)-2-AMINO-6-[(1-HYDROXY-1-OXO-PROPAN-2-YLIDENE)AMINO]HEXANOIC"),
   9.604 +('k', True, "KST", "N~6~-(5-CARBOXY-3-THIENYL)-L-LYSINE"),
   9.605 +('k', True, "KYQ", "(E)-N~6~-(1-CARBOXY-2-HYDROXYETHYLIDENE)-L-LYSINE"),
   9.606 +('k', True, "LA2", "N~6~-[(6R)-6,8-DISULFANYLOCTANOYL]-L-LYSINE"),
   9.607 +('k', True, "LCK", "(Z)-N~6~-(2-CARBOXY-1-METHYLETHYLIDENE)-L-LYSINE"),
   9.608 +('k', True, "LCX", "CARBOXYLATED"),
   9.609 +('k', True, "LDH", "N~6~-ETHYL-L-LYSINE"),
   9.610 +('k', True, "LLP", "2-LYSINE(3-HYDROXY-2-METHYL-5-PHOSPHONOOXYMETHYL-PYRIDIN-4-YLMETHANE)"),
   9.611 +('k', True, "LLY", "NZ-(DICARBOXYMETHYL)LYSINE"),
   9.612 +('k', True, "LYM", "DEOXY-METHYL-LYSINE"),
   9.613 +('k', True, "LYN", "2,6-DIAMINO-HEXANOIC"),
   9.614 +('k', True, "LYR", "N~6~-[(2Z,4E,6E,8E)-3,7-DIMETHYL-9-(2,6,6-TRIMETHYLCYCLOHEX-1-EN-1-YL)NONA-2,4,6,8-TETRAENYL]LYSINE"),
   9.615 +('k', True, "LYX", "N''-(2-COENZYME A)-PROPANOYL-LYSINE"),
   9.616 +('k', True, "LYZ", "5-HYDROXYLYSINE"),
   9.617 +('k', True, "M2L", "(2R)-2-AMINO-3-(2-DIMETHYLAMINOETHYLSULFANYL)PROPANOIC"),
   9.618 +('k', True, "M3L", "N-TRIMETHYLLYSINE"),
   9.619 +('k', True, "MCL", "NZ-(1-CARBOXYETHYL)-LYSINE"),
   9.620 +('k', True, "ML3", "2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}-N,N,N-TRIMETHYLETHANAMINIUM"),
   9.621 +('k', True, "MLY", "N-DIMETHYL-LYSINE"),
   9.622 +('k', True, "MLZ", "N-METHYL-LYSINE"),
   9.623 +('k', True, "SHR", "N-(5-AMINO-5-CARBOXYPENTYL)GLUTAMIC"),
   9.624 +('k', True, "SLZ", "L-THIALYSINE"),
   9.625 +('k', True, "TRG", "L-(N,N)"),
   9.626 +('k', True, "VB1", "N^6^-[(1R)-2-{[(1S)-1-CARBOXYPROPYL]AMINO}-2-OXO-1-(SULFANYLMETHYL)ETHYL]-6-OXO-L-LYSINE"),
   9.627 +('k', True, "XX1", "N~6~-7H-PURIN-6-YL-L-LYSINE"),
   9.628 +('l', True, "0AG", "N-(ETHOXYCARBONYL)-L-LEUCINE"),
   9.629 +('l', True, "1LU", "4-METHYL-PENTANOIC"),
   9.630 +('l', True, "2LU", "2-AMINO-4-METHYL-PENTANYL"),
   9.631 +('l', True, "2ML", "2-METHYLLEUCINE"),
   9.632 +('l', True, "BLE", "LEUCINE"),
   9.633 +('l', True, "BTA", "4-DEMETHYL-LEUCINE"),
   9.634 +('l', True, "BUG", "TERT-LEUCYL"),
   9.635 +('l', True, "CLE", "LEUCINE"),
   9.636 +('l', True, "DLE", "D-LEUCINE"),
   9.637 +('l', True, "DNE", "D-NORLEUCINE"),
   9.638 +('l', True, "DNG", "N-FORMYL-D-NORLEUCINE"),
   9.639 +('l', True, "DNM", "N-METHYL-D-NORLEUCINE"),
   9.640 +('l', True, "DON", "6-DIAZENYL-5-OXO-L-NORLEUCINE"),
   9.641 +('l', True, "EXY", "6-[(2R)-OXIRAN-2-YL]-L-NORLEUCINE"),
   9.642 +('l', True, "FLE", "FUROYL-LEUCINE"),
   9.643 +('l', True, "HLU", "BETA-HYDROXYLEUCINE"),
   9.644 +('l', True, "LED", "(4R)-5-OXO-L-LEUCINE"),
   9.645 +('l', True, "LEF", "(4S)-5-FLUORO-L-LEUCINE"),
   9.646 +('l', True, "LEH", "N-[12-(1H-IMIDAZOL-1-YL)DODECANOYL]-L-LEUCINE"),
   9.647 +('l', True, "MHL", "N-METHYL-4-HYDROXY-LEUCINE"),
   9.648 +('l', True, "MLE", "N-METHYLLEUCINE"),
   9.649 +('l', True, "MLL", "METHYL"),
   9.650 +('l', True, "MNL", "4,N-DIMETHYLNORLEUCINE"),
   9.651 +('l', True, "NLE", "NORLEUCINE"),
   9.652 +('l', True, "NLN", "NORLEUCINE"),
   9.653 +('l', True, "NLO", "O-METHYL-L-NORLEUCINE"),
   9.654 +('l', True, "NLP", "(1-AMINO-PENTYL)-PHOSPHONIC"),
   9.655 +('l', True, "PLE", "LEUCINE"),
   9.656 +('l', True, "PPH", "PHENYLALANINE"),
   9.657 +('m', True, "2FM", "S-(DIFLUOROMETHYL)HOMOCYSTEINE"),
   9.658 +('m', True, "CXM", "N-CARBOXYMETHIONINE"),
   9.659 +('m', True, "ESC", "2-AMINO-4-ETHYL"),
   9.660 +('m', True, "FME", "N-FORMYLMETHIONINE"),
   9.661 +('m', True, "KOR", "L-HOMOCYSTEINE-S-N-S-L-CYSTEINE"),
   9.662 +('m', True, "MED", "D-METHIONINE"),
   9.663 +('m', True, "MHO", "S-OXYMETHIONINE"),
   9.664 +('m', True, "MME", "N-METHYL"),
   9.665 +('m', True, "MSE", "SELENOMETHIONINE"),
   9.666 +('m', True, "MSL", "(2S)-2-AMINO-4-(METHYLSULFONIMIDOYL)BUTANOIC"),
   9.667 +('m', True, "MSO", "SELENOMETHIONINE"),
   9.668 +('m', True, "MT2", "[(3S)-3-AMINO-3-CARBOXYPROPYL](ETHYL)METHYLSULFONIUM"),
   9.669 +('m', True, "OMT", "S-DIOXYMETHIONINE"),
   9.670 +('m', True, "SME", "METHIONINE"),
   9.671 +('n', True, "0A5", "N~2~-PROPANOYL-L-ASPARAGINE"),
   9.672 +('n', True, "AFA", "N-[7-METHYL-OCT-2,4-DIENOYL]ASPARAGINE"),
   9.673 +('n', True, "AHB", "BETA-HYDROXYASPARAGINE"),
   9.674 +('n', True, "B3X", "(3S)-3,5-DIAMINO-5-OXOPENTANOIC"),
   9.675 +('n', True, "DMH", "N4,N4-DIMETHYL-ASPARAGINE"),
   9.676 +('n', True, "DSG", "D-ASPARAGINE"),
   9.677 +('n', True, "MEN", "N-METHYL"),
   9.678 +('n', True, "SNN", "L-3-AMINOSUCCINIMIDE"),
   9.679 +('p', True, "0AZ", "(4R)-4-HYDROXY-L-PROLINE"),
   9.680 +('p', True, "2MT", "(4R)-2,2-DIMETHYL-1,3-THIAZOLIDINE-4-CARBOXYLIC"),
   9.681 +('p', True, "4FB", "(4S)-4-FLUORO-L-PROLINE"),
   9.682 +('p', True, "DPL", "4-OXOPROLINE"),
   9.683 +('p', True, "DPR", "D-PROLINE"),
   9.684 +('p', True, "H5M", "TRANS-3-HYDROXY-5-METHYLPROLINE"),
   9.685 +('p', True, "HY3", "3-HYDROXYPROLINE"),
   9.686 +('p', True, "HYP", "4-HYDROXYPROLINE"),
   9.687 +('p', True, "LPD", "L-PROLINAMIDE"),
   9.688 +('p', True, "N7P", "1-ACETYL-D-PROLINE"),
   9.689 +('p', True, "P2Y", "(2S)-PYRROLIDIN-2-YLMETHYLAMINE"),
   9.690 +('p', True, "PCC", "5-OXOPROLINE"),
   9.691 +('p', True, "POM", "CIS-5-METHYL-4-OXOPROLINE"),
   9.692 +('p', True, "PRS", "THIOPROLINE"),
   9.693 +('p', True, "SLA", "(3S,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE"),
   9.694 +('p', True, "SLR", "(3R,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE"),
   9.695 +('q', True, "DGN", "D-GLUTAMINE"),
   9.696 +('q', True, "GHG", "GAMMA-HYDROXY-GLUTAMINE"),
   9.697 +('q', True, "GLH", "N-5-CYCLOHEXYL-N-5-[(CYCLOHEXYLAMINO)CARBONYL]GLUTAMINE"),
   9.698 +('q', True, "MEQ", "N5-METHYLGLUTAMINE"),
   9.699 +('q', True, "MGN", "2-METHYL-GLUTAMINE"),
   9.700 +('q', True, "NLQ", "N~2~-ACETYL-L-GLUTAMINE"),
   9.701 +('r', True, "2MR", "N3,"),
   9.702 +('r', True, "AAR", "ARGININEAMIDE"),
   9.703 +('r', True, "ACL", "DEOXY-CHLOROMETHYL-ARGININE"),
   9.704 +('r', True, "AGM", "5-METHYL-ARGININE"),
   9.705 +('r', True, "ALG", "GUANIDINOBUTYRYL"),
   9.706 +('r', True, "ARM", "DEOXY-METHYL-ARGININE"),
   9.707 +('r', True, "ARO", "C-GAMMA-HYDROXY"),
   9.708 +('r', True, "BOR", "(1R)-1-AMINO-4-{[(E)-AMINO(IMINO)METHYL]AMINO}BUTYLBORONIC"),
   9.709 +('r', True, "CIR", "CITRULLINE"),
   9.710 +('r', True, "DAR", "D-ARGININE"),
   9.711 +('r', True, "DIR", "3-{[(E)-AMINO(HYDROXYIMINO)METHYL]AMINO}ALANINE"),
   9.712 +('r', True, "HAR", "N-OMEGA-HYDROXY-L-ARGININE"),
   9.713 +('r', True, "HMR", "BETA-HOMOARGININE"),
   9.714 +('r', True, "HRG", "L-HOMOARGININE"),
   9.715 +('r', True, "MAI", "DEOXO-METHYLARGININE"),
   9.716 +('r', True, "MGG", "2-(2-CARBOXY-ACETYLAMINO)-5-GUANIDINO-PENTANOIC"),
   9.717 +('r', True, "NMM", "(R)-2-AMINO-5-(3-METHYLGUANIDINO)BUTANOIC"),
   9.718 +('r', True, "NNH", "NOR-N-OMEGA-HYDROXY-L-ARGININE"),
   9.719 +('r', True, "OPR", "C-(3-OXOPROPYL)ARGININE"),
   9.720 +('r', True, "ORQ", "N~5~-ACETYL-L-ORNITHINE"),
   9.721 +('s', True, "0AH", "O-(BROMOACETYL)-L-SERINE"),
   9.722 +('s', True, "AZS", "O-DIAZOACETYL-L-SERINE"),
   9.723 +('s', True, "B3S", "(3R)-3-AMINO-4-HYDROXYBUTANOIC"),
   9.724 +('s', True, "BG1", "O-[(2S)-2-{METHYL[(METHYLAMINO)SULFONYL]AMINO}PENTANOYL]-L-SERINE"),
   9.725 +('s', True, "BSE", "BETA-3-SERINE"),
   9.726 +('s', True, "CWR", "(4-METHYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL)ACETIC"),
   9.727 +('s', True, "DBS", "2-(2,3-DIHYDROXY-BENZOYLAMINO)-3-HYDROXY-PROPIONIC"),
   9.728 +('s', True, "DSE", "N-METHYL-D-SERINE"),
   9.729 +('s', True, "DSN", "D-SERINE"),
   9.730 +('s', True, "FGP", "2-AMINO-3-HYDROXY-3-PHOSPHONOOXY-PROPIONIC"),
   9.731 +('s', True, "HSE", "L-HOMOSERINE"),
   9.732 +('s', True, "HSL", "HOMOSERINE"),
   9.733 +('s', True, "LPS", "O-{HYDROXY[((2R)-2-HYDROXY-3-{[(1S)-1-HYDROXYPENTADECYL]OXY}PROPYL)OXY]PHOSPHORYL}-L-SERINE"),
   9.734 +('s', True, "MC1", "METHICILLIN"),
   9.735 +('s', True, "MIS", "MONOISOPROPYLPHOSPHORYLSERINE"),
   9.736 +('s', True, "N10", "O-[(HEXYLAMINO)CARBONYL]-L-SERINE"),
   9.737 +('s', True, "NC1", "NITROCEFIN"),
   9.738 +('s', True, "OAS", "O-ACETYLSERINE"),
   9.739 +('s', True, "OLZ", "O-(2-AMINOETHYL)-L-SERINE"),
   9.740 +('s', True, "OSE", "O-SULFO-L-SERINE"),
   9.741 +('s', True, "PG1", "PENICILLIN"),
   9.742 +('s', True, "S1H", "1-HEXADECANOSULFONYL-O-L-SERINE"),
   9.743 +('s', True, "SAC", "N-ACETYL-SERINE"),
   9.744 +('s', True, "SBD", "D-NAPHTHYL-1-ACETAMIDO"),
   9.745 +('s', True, "SBL", "L-NAPHTHYL-1-ACETAMIDO"),
   9.746 +('s', True, "SDP", "2-AMINO-3-(DIETHOXY-PHOSPHORYLOXY)-PROPIONIC"),
   9.747 +('s', True, "SEB", "O-BENZYLSULFONYL-SERINE"),
   9.748 +('s', True, "SEL", "2-AMINO-1,3-PROPANEDIOL"),
   9.749 +('s', True, "SEP", "PHOSPHOSERINE"),
   9.750 +('s', True, "SET", "AMINOSERINE"),
   9.751 +('s', True, "SGB", "O-[(S)-METHYL(1-METHYLETHOXY)PHOSPHORYL]-L-SERINE"),
   9.752 +('s', True, "SOY", "OXACILLIN-ACYLATED"),
   9.753 +('s', True, "SUN", "O-[(R)-(DIMETHYLAMINO)(ETHOXY)PHOSPHORYL]-L-SERINE"),
   9.754 +('s', True, "SVA", "SERINE"),
   9.755 +('s', True, "TNR", "O-(2-ACETAMIDO-2-DEOXY-ALPHA-D-GALACTOPYRANOSYL)-L-SERINE"),
   9.756 +('t', True, "ALO", "ALLO-THREONINE"),
   9.757 +('t', True, "BMT", "4-METHYL-4-[(E)-2-BUTENYL]-4,N-METHYL-THREONINE"),
   9.758 +('t', True, "CTH", "4-CHLOROTHREONINE"),
   9.759 +('t', True, "D11", "D-PHOSHPHOTHREONINE"),
   9.760 +('t', True, "DTH", "D-THREONINE"),
   9.761 +('t', True, "IYT", "N-ALPHA-ACETYL-3,5-DIIODOTYROSYL-D-THREONINE"),
   9.762 +('t', True, "OLT", "O-METHYL-L-THREONINE"),
   9.763 +('t', True, "TBM", "4-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE"),
   9.764 +('t', True, "THC", "N-METHYLCARBONYLTHREONINE"),
   9.765 +('t', True, "TMB", "N-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE"),
   9.766 +('t', True, "TMD", "(6,7-DIHYDRO)4-[(E)-BUTENYL]-4,N-DIMETHYL-THREONINE"),
   9.767 +('t', True, "TPO", "PHOSPHOTHREONINE"),
   9.768 +('v', True, "0AA", "METHYL"),
   9.769 +('v', True, "0AB", "(3S,4S)-3-AMINO-4-METHYL-3,4-DIHYDRO-2H-PYRAN-2-ONE"),
   9.770 +('v', True, "2VA", "2'-(L-VALYL)AMINO-2'-DEOXYADENOSINE"),
   9.771 +('v', True, "B2V", "VALINE"),
   9.772 +('v', True, "DHN", "5-HYDROXY"),
   9.773 +('v', True, "DIV", "D-ISOVALINE"),
   9.774 +('v', True, "DVA", "D-VALINE"),
   9.775 +('v', True, "MNV", "N-METHYL-C-AMINO"),
   9.776 +('v', True, "MVA", "N-METHYLVALINE"),
   9.777 +('v', True, "NVA", "NORVALINE"),
   9.778 +('v', True, "VAD", "DEAMINOHYDROXYVALINE"),
   9.779 +('v', True, "VAF", "METHYLVALINE"),
   9.780 +('w', True, "0AF", "7-HYDROXY-L-TRYPTOPHAN"),
   9.781 +('w', True, "1TQ", "6-(FORMYLAMINO)-7-HYDROXY-L-TRYPTOPHAN"),
   9.782 +('w', True, "4DP", "3-[5-(DIMETHYLAMINO)-1,3-DIOXO-1,3-DIHYDRO-2H-ISOINDOL-2-YL]-L-ALANINE"),
   9.783 +('w', True, "4FW", "4-FLUOROTRYPTOPHANE"),
   9.784 +('w', True, "4HT", "4-HYDROXYTRYPTOPHAN"),
   9.785 +('w', True, "6CW", "6-CHLORO-L-TRYPTOPHAN"),
   9.786 +('w', True, "BTR", "6-BROMO-TRYPTOPHAN"),
   9.787 +('w', True, "DTR", "D-TRYPTOPHAN"),
   9.788 +('w', True, "FT6", "6-FLUORO-L-TRYPTOPHAN"),
   9.789 +('w', True, "FTR", "FLUOROTRYPTOPHANE"),
   9.790 +('w', True, "HRP", "5-HYDROXY-L-TRYPTOPHAN"),
   9.791 +('w', True, "HTR", "BETA-HYDROXYTRYPTOPHANE"),
   9.792 +('w', True, "LTR", "L-TRYPTOPHAN"),
   9.793 +('w', True, "PAT", "ALPHA-PHOSPHONO-TRYPTOPHAN"),
   9.794 +('w', True, "TOX", "2-AMINO-3-(1-HYDROPEROXY-1H-INDOL-3-YL)PROPAN-1-OL"),
   9.795 +('w', True, "TPL", "TRYPTOPHANOL"),
   9.796 +('w', True, "TQQ", "(S)-2-AMINO-3-(6,7-DIHYDRO-6-IMINO-7-OXO-1H-INDOL-3-YL)PROPANOIC"),
   9.797 +('w', True, "TRF", "N1-FORMYL-TRYPTOPHAN"),
   9.798 +('w', True, "TRN", "NZ2-TRYPTOPHAN"),
   9.799 +('w', True, "TRO", "2-HYDROXY-TRYPTOPHAN"),
   9.800 +('w', True, "TRQ", "2-AMINO-3-(6,7-DIOXO-6,7-DIHYDRO-1H-INDOL-3-YL)-PROPIONIC"),
   9.801 +('w', True, "TRW", "TRW3-(2-AMINO-3-HYDROXY-PROPYL)-6-(N'-CYCLOHEXYL-HYDRAZINO)OCTAHYDRO-INDOL-7-OL"),
   9.802 +('w', True, "TRX", "6-HYDROXYTRYPTOPHAN"),
   9.803 +('w', True, "TTQ", "6-AMINO-7-HYDROXY-L-TRYPTOPHAN"),
   9.804 +('x', True, "01W", "(2S)-2-AMMONIO-4-[(2,4-DINITROPHENYL)AMINO]BUTANOATE"),
   9.805 +('x', True, "0AY", "DIETHYL"),
   9.806 +('x', True, "0G6", "D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE"),
   9.807 +('x', True, "0Z6", "D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PHENYLALANINAMIDE"),
   9.808 +('x', True, "0ZC", "(3R)-2-[N-(FURAN-2-YLCARBONYL)-L-LEUCYL]-2,3,4,9-TETRAHYDRO-1H-BETA-CARBOLINE-3-CARBOXYLIC"),
   9.809 +('x', True, "0ZE", "AMINO{[(4R)-4-({[(3R,6S,8AS)-6-AMINO-6-BENZYL-5-OXOOCTAHYDROINDOLIZIN-3-YL]CARBONYL}AMINO)-5-(1,3-BENZOTHIAZOL-2-YL)-5-OXOPENTYL]AMINO}METHANIMINIUM"),
   9.810 +('x', True, "0ZJ", "N-(SULFANYLACETYL)-D-PHENYLALANYL-N-[(1S)-4-CARBAMIMIDAMIDO-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE"),
   9.811 +('x', True, "0ZM", "(2R)-2-{[(2R)-2-{[(S)-HYDROXY{[(2R,3S,4S,5R,6R)-3,4,5-TRIHYDROXY-6-METHYLTETRAHYDRO-2H-PYRAN-2-YL]OXY}PHOSPHORYL]AMINO}-4-METHYLPENTANOYL]AMINO}-3-(1H-INDOL-3-YL)PROPANOIC"),
   9.812 +('x', True, "175", "3,5-DIHYDRO-5-METHYLIDENE-4H-IMIDAZOL-4-ON"),
   9.813 +('x', True, "193", "(2S)-4-(BETA-ALANYLAMINO)-2-AMINOBUTANOIC"),
   9.814 +('x', True, "1PI", "3-(1-CARBAMIMIDOYL-PIPERIDIN-3-YL)-L-ALANINE"),
   9.815 +('x', True, "1ZX", "D-PHENYLALANYL-N-[(1S)-1-ACETYL-4-{[AMINO(IMINIO)METHYL]AMINO}BUTYL]-L-PROLINAMIDE"),
   9.816 +('x', True, "23S", "(S)-2-AMINO-3-(6H-SELENOLO[2,3-B]-PYRROL-4-YL)-PROPIONIC"),
   9.817 +('x', True, "2AD", "2'-AMINO-2'-DEOXYADENOSINE"),
   9.818 +('x', True, "2AO", "(2S)-2-AMINOHEXAN-1-OL"),
   9.819 +('x', True, "2AS", "(2S,3S)-3-AMINO-2-METHYL-4-OXOBUTANOIC"),
   9.820 +('x', True, "2DO", "(2S)-2-AMINOHEXANE-1,1-DIOL"),
   9.821 +('x', True, "2PI", "2-AMINO-PENTANOIC"),
   9.822 +('x', True, "2SI", "2-O-SULFO-A-L-IDURONIC"),
   9.823 +('x', True, "32S", "(S)-2-AMINO-3-(4H-SELENOLO[3,2-B]-PYRROL-6-YL)-PROPIONIC"),
   9.824 +('x', True, "32T", "(S)-2-AMINO-3-(4H-THIENO[3,2-B]-PYRROL-6-YL)-PROPIONIC"),
   9.825 +('x', True, "3AR", "N-OMEGA-PROPYL-L-ARGININE"),
   9.826 +('x', True, "3MM", "(1R)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM"),
   9.827 +('x', True, "3TY", "3-[(3E)-3-(BENZYLHYDRAZONO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE"),
   9.828 +('x', True, "4F3", "[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(4-FLUORO-1H-INDOL-3-YLMETHYL)-5-HYDROXY-IMIDAZOL-1-YL]-ACETIC"),
   9.829 +('x', True, "4IN", "4-AMINO-L-TRYPTOPHAN"),
   9.830 +('x', True, "4MM", "(1S)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM"),
   9.831 +('x', True, "5ZA", "(5Z)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-5-[(4-AMINO-1H-INDOL-3-YL)METHYLENE]-3-(2-HYDROXYETHYL)-3,5-DIHYDRO-4H-IMIDAZOL-4-ONE"),
   9.832 +('x', True, "A66", "2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-ADENINE"),
   9.833 +('x', True, "AB7", "ALPHA-AMINOBUTYRIC"),
   9.834 +('x', True, "ADD", "2,6,8-TRIMETHYL-3-AMINO-9-BENZYL-9-METHOXYNONANOIC"),
   9.835 +('x', True, "AEA", "(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETIC"),
   9.836 +('x', True, "AGT", "S-{(S)-AMINO[(4-AMINOBUTYL)AMINO]METHYL}-L-CYSTEINE"),
   9.837 +('x', True, "AHH", "AMINO-HYDROXYHEPTANOIC"),
   9.838 +('x', True, "AHS", "(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-ISOBUTYL-CARBAMIC"),
   9.839 +('x', True, "AHT", "4-(2,5-DIAMINO-5-HYDROXY-PENTYL)-PHENOL"),
   9.840 +('x', True, "ALQ", "2-METHYL-PROPIONIC"),
   9.841 +('x', True, "ANI", "4-TRIFLUOROMETHYLANILINE"),
   9.842 +('x', True, "APE", "(1-AMINO-2-PHENYL-ETHYL)-CARBAMIC"),
   9.843 +('x', True, "APM", "M-AMIDINOPHENYL-3-ALANINE"),
   9.844 +('x', True, "APO", "D-2-AMINO-3-PHOSPHONO-PROPIONIC"),
   9.845 +('x', True, "APP", "1-ACETYL-2-CARBOXYPIPERIDINE"),
   9.846 +('x', True, "ARV", "5-N-ALLYL-ARGININE"),
   9.847 +('x', True, "AS9", "N-[HYDROXY(METHYL)PHOSPHORYL]-L-ASPARTIC"),
   9.848 +('x', True, "ASM", "2-AMINO-4-OXO-4(1H-PYRROL-1-YL)BUTANOIC"),
   9.849 +('x', True, "ASX", "ASP/ASN"),
   9.850 +('x', True, "AVN", "(2S)-AMINO[(5S)-3-CHLORO-4,5-DIHYDROISOXAZOL-5-YL]ACETIC"),
   9.851 +('x', True, "AYG", "[(4E)-2-[(1S)-1-AMINOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.852 +('x', True, "B3L", "(3S)-3-AMINO-5-METHYLHEXANOIC"),
   9.853 +('x', True, "B3Q", "(3S)-3,6-DIAMINO-6-OXOHEXANOIC"),
   9.854 +('x', True, "BCC", "6-AMINO-6-BENZYL-5-OXO-OCTAHYDRO-INDOLIZINE-3-CARBALDEHYDE"),
   9.855 +('x', True, "BE2", "2-AMINOBENZOIC"),
   9.856 +('x', True, "BIL", "(3R,4S)-3-AMINO-4-METHYLHEXANOIC"),
   9.857 +('x', True, "BJH", "1(R)-1-ACETAMIDO-2-(3-CARBOXY-2-HYDROXYPHENYL)ETHYL"),
   9.858 +('x', True, "BNO", "NORLEUCINE"),
   9.859 +('x', True, "C12", "2-(1-AMINO-2-HYDROXYPROPYL)-4-(4-HYDROXYBENZYL)-1-(2-OXOETHYL)-1H-IMIDAZOL-5-OLATE"),
   9.860 +('x', True, "C66", "2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-CYTOSINE"),
   9.861 +('x', True, "C99", "{(2R)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-2-HYDROXY-4,5-DIOXOIMIDAZOLIDIN-1-YL}ACETIC"),
   9.862 +('x', True, "CAL", "5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOBUTYL-HEXANOIC"),
   9.863 +('x', True, "CAV", "5-AMINO-6-CYCLOHEXYL-3,4-DIHYDROXY-2-ISOPROPYL-HEXANOIC"),
   9.864 +('x', True, "CCY", "2-(1-AMINO-2-MERCAPTO-ETHYL)-5-(4-HYDROXY-BENZYL)-3-(ETHANOYL)-3,5-DIHYDRO-IMIDAZOL-4-ONE"),
   9.865 +('x', True, "CDE", "1,2-DIMETHYL-PROPYLAMINE"),
   9.866 +('x', True, "CDV", "3-METHYL-2-UREIDO-BUTYRIC"),
   9.867 +('x', True, "CFY", "[(2S)-2-{(2R)-2-[(1S)-1-AMINO-2-PHENYLETHYL]-2-HYDROXY-2,5-DIHYDRO-1,3-THIAZOL-4-YL}-4-(4-HYDROXYBENZYL)-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.868 +('x', True, "CH6", "{(4Z)-2-[(1S)-1-AMINO-3-(METHYLSULFANYL)PROPYL]-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"),
   9.869 +('x', True, "CH7", "[(4Z)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-2-(3,4,5,6-TETRAHYDROPYRIDIN-2-YL)-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.870 +('x', True, "CHF", "CYCLOHEXYLFLUOROSTATONE"),
   9.871 +('x', True, "CHG", "CYCLOHEXYL-GLYCINE"),
   9.872 +('x', True, "CHS", "4-AMINO-5-CYCLOHEXYL-3-HYDROXY-PENTANOIC"),
   9.873 +('x', True, "CLV", "{(2S)-2-[(1S)-1-AMINOETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"),
   9.874 +('x', True, "CPC", "2-METHYL-1-METHYLAMINO-CYCLOPROPANE"),
   9.875 +('x', True, "CPI", "6-CARBOXYPIPERIDINE"),
   9.876 +('x', True, "CPV", "5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOPROPYL-HEXANOIC"),
   9.877 +('x', True, "CQR", "[(4Z)-2-(AMINOMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.878 +('x', True, "CR0", "[2-(1-AMINO-2-HYDROXYPROPYL)-2-HYDROXY-4-ISOBUTYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETALDEHYDE"),
   9.879 +('x', True, "CR2", "{(4Z)-2-(AMINOMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"),
   9.880 +('x', True, "CR7", "[(4Z)-2-[(1S)-1,5-DIAMINOPENTYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.881 +('x', True, "CR8", "2-[1-AMINO-2-(1H-IMIDAZOL-5-YL)ETHYL]-1-(CARBOXYMETHYL)-4-[(4-OXOCYCLOHEXA-2,5-DIEN-1-YLIDENE)METHYL]-1H-IMIDAZOL-5-OLATE"),
   9.882 +('x', True, "CRF", "[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETALDEHYDE"),
   9.883 +('x', True, "CRG", "[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(3H-IMIDAZOL-4-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC"),
   9.884 +('x', True, "CRK", "4-{(Z)-[2-[3-(METHYLSULFANYL)PROPANOYL]-5-OXO-1-(2-OXOETHYL)-1,5-DIHYDRO-4H-IMIDAZOL-4-YLIDENE]METHYL}BENZENOLATE"),
   9.885 +('x', True, "CRO", "{2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"),
   9.886 +('x', True, "CRQ", "[2-(3-CARBAMOYL-1-IMINO-PROPYL)-4-(4-HYDROXY-BENZYLIDENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC"),
   9.887 +('x', True, "CRW", "[2-(1-AMINOETHYL)-4-METHYLENE-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.888 +('x', True, "CRX", "[2-(1-AMINOETHYL)-2-HYDROXY-4-METHYLENE-5-OXOIMIDAZOLIDIN-1-YL]ACETIC"),
   9.889 +('x', True, "CSH", "[2-(2-HYDROXY-1-METHYL-ETHYL)-4-(1H-IMIDAZOL-4-YLMETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC"),
   9.890 +('x', True, "CSY", "[4-(4-HYDROXY-BENZYL)-2-(2-HYDROXY-1-METHYL-ETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC"),
   9.891 +('x', True, "CUC", "CYCLOHEXYLGLYCYL"),
   9.892 +('x', True, "CYJ", "(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)PIPERIDIN-3-YLIDENE]-L-LYSINE"),
   9.893 +('x', True, "D4P", "(2S)-AMINO(4-HYDROXYPHENYL)ACETIC"),
   9.894 +('x', True, "DA2", "NG,NG-DIMETHYL-L-ARGININE"),
   9.895 +('x', True, "DAM", "N-METHYL-ALPHA-BETA-DEHYDROALANINE"),
   9.896 +('x', True, "DCI", "2-METHYL-BUTYLAMINE"),
   9.897 +('x', True, "DCL", "2-AMINO-4-METHYL-PENTAN-1-OL"),
   9.898 +('x', True, "DFI", "2,2-DIFLUOROSTATINE"),
   9.899 +('x', True, "DFO", "2,2-DIFLUORO-3-HYDROSTATINE"),
   9.900 +('x', True, "DHL", "2-AMINO-ETHANETHIOL"),
   9.901 +('x', True, "DHP", "3-DECYL-2,5-DIOXO-4-HYDROXY-3-PYRROLINE"),
   9.902 +('x', True, "DMT", "3-HYDROXY-4,4-DIMETHYL-2-(METHYLAMINO)-6-OCTENOIC"),
   9.903 +('x', True, "DOA", "12-AMINO-DODECANOIC"),
   9.904 +('x', True, "DYG", "(3S)-3-AMINO-3-[(4Z)-1-(CARBOXYMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-IMIDAZOL-2-YL]PROPANOIC"),
   9.905 +('x', True, "EOV", "(3S,6S,9S,12R,15S,18S,21S,24S,30S,33S)-30-ethyl-33-[(1R,2R,4E)-1-hydroxy-2-methylhex-4-en-1-yl]-1,4,7,10,12,15,19,25,28-nonamethyl-3,21-bis(1-methylethyl)-6,9,18,24-tetrakis(2-methylpropyl)-1,4,7,10,13,16,19,22,25,28,31-undecaazacyclotritriacontane-2,5,8,11,14,17,20,23,26,29,32-undecone"),
   9.906 +('x', True, "EYS", "S-SELANYLCYSTEINAL"),
   9.907 +('x', True, "FHL", "(E)-N~6~-[3-CARBOXY-1-(HYDROXYMETHYL)PROPYLIDENE]-L-LYSINE"),
   9.908 +('x', True, "FRD", "1-PHENYL-2-AMINOPROPANE"),
   9.909 +('x', True, "GCM", "GLYCYLMETHYLENE"),
   9.910 +('x', True, "GLM", "1-AMINO-PROPAN-2-ONE"),
   9.911 +('x', True, "GLX", "GLU/GLN"),
   9.912 +('x', True, "GND", "2-AMINO-5-GUANIDINO-PENTANOIC"),
   9.913 +('x', True, "GVL", "O-[(R)-{[(3R)-4-AMINO-3-HYDROXY-2,2-DIMETHYL-4-OXOBUTYL]OXY}(HYDROXY)PHOSPHORYL]-L-SERINE"),
   9.914 +('x', True, "GYC", "[(4Z)-2-[(1R)-1-AMINO-2-MERCAPTOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.915 +('x', True, "GYS", "[(4Z)-2-(1-AMINO-2-HYDROXYETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.916 +('x', True, "HAQ", "5-AMINO-4-OXO-1,2,4,5,6,7-HEXAHYDRO-AZEPINO[3,2,1-HI]INDOLE-2-CARBOXYLIC"),
   9.917 +('x', True, "HCS", "2-AMINO-4-MERCAPTO-BUTYRIC"),
   9.918 +('x', True, "HFA", "ALPHA-HYDROXY-BETA-PHENYL-PROPIONIC"),
   9.919 +('x', True, "HHK", "(2S)-2,8-DIAMINOOCTANOIC"),
   9.920 +('x', True, "HPH", "PHENYLALANINDIOL"),
   9.921 +('x', True, "HV8", "BENZYL-2-AMINO-PARAMETHOXY-BENZYLSTATINE"),
   9.922 +('x', True, "IEY", "2-((1E)-2-(5-IMIDAZOLYL)ETHENYL)-4-(P-HYDROXYBENZYLIDENE)-5-IMIDAZOLINONE"),
   9.923 +('x', True, "IIC", "4-IMIDAZOLMETHYLENE-5-IMIDAZOLONE"),
   9.924 +('x', True, "ISO", "PARA-ISOPROPYLANILINE"),
   9.925 +('x', True, "IVA", "ISOVALERIC"),
   9.926 +('x', True, "L2A", "(2S,5S,8S,11S,15E,20S)-20-AMINO-2-(CARBOXYMETHYL)-11,20-DIMETHYL-5,8-BIS(2-METHYLPROPYL)-3,6,9,21-TETRAOXO-1,4,7,10-TETRAAZACYCLOHENICOS-15-ENE-11-CARBOXYLIC"),
   9.927 +('x', True, "LET", "(Z)-N^6-{3-CARBOXY-1-[(4-CARBOXY-2-OXOBUTOXY)METHYL]PROPYLIDENE}-L-LYSINE"),
   9.928 +('x', True, "LNT", "N-[(2S)-2-AMINO-1,1-DIHYDROXY-4-METHYLPENTYL]-L-THREONINE"),
   9.929 +('x', True, "LOL", "LEUCINOL"),
   9.930 +('x', True, "LOV", "5-AMINO-4-HYDROXY-2-ISOPROPYL-7-METHYL-OCTANOIC"),
   9.931 +('x', True, "LPL", "LEU-HYDROXYETHYLENE-LEU"),
   9.932 +('x', True, "LSO", "(Z)-N~6~-(3-CARBOXY-1-{[(4-CARBOXY-2-OXOBUTYL)SULFONYL]METHYL}PROPYLIDENE)-L-LYSINE"),
   9.933 +('x', True, "LTA", "4,8-DIAMINO-3-HYDROXY-OCTANOIC"),
   9.934 +('x', True, "MCG", "(S)-(ALPHA)-METHYL-4-CARBOXYPHENYLGLYCINE"),
   9.935 +('x', True, "MDH", "N-METHYLDEHYDROBUTYRINE"),
   9.936 +('x', True, "MDO", "{2-[(1S)-1-AMINOETHYL]-5-HYDROXY-4-METHYL-1H-IMIDAZOL-1-YL}ACETIC"),
   9.937 +('x', True, "MF3", "2-AMINO-4-TRIFLUOROMETHYLSULFANYL-BUTYRIC"),
   9.938 +('x', True, "MFC", "5-[1-(3-FLUORO-4-HYDROXY-PHENYL)-METH-(Z)-YLIDENE]-3,5-DIHYDRO-IMIDAZOL-4-ONE"),
   9.939 +('x', True, "MOD", "L-METHIONYL"),
   9.940 +('x', True, "MOR", "N-CARBONYLMORPHOLINE"),
   9.941 +('x', True, "MPH", "(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHONIC"),
   9.942 +('x', True, "MPJ", "(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHINIC"),
   9.943 +('x', True, "MSU", "SUCCINIC"),
   9.944 +('x', True, "N2C", "N,S-DIMETHYLCYSTEINE"),
   9.945 +('x', True, "NCY", "N-METHYLCYSTEINE"),
   9.946 +('x', True, "NIT", "4-NITROANILINE"),
   9.947 +('x', True, "NRQ", "{(4Z)-4-(4-HYDROXYBENZYLIDENE)-2-[3-(METHYLTHIO)PROPANIMIDOYL]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"),
   9.948 +('x', True, "NSK", "N-SUCCINYL"),
   9.949 +('x', True, "NYC", "[(4E)-2-[(1R,2S)-1-AMINO-2-HYDROXYPROPYL]-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.950 +('x', True, "NYG", "[(4Z)-2-[(1S)-1,3-DIAMINO-3-OXOPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.951 +('x', True, "O12", "N~5~-DODECANOYL-L-ORNITHINE"),
   9.952 +('x', True, "OBS", "(Z)-N^6-[(4S,5R)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)-1-HYDROXYDIHYDRO-2H-THIOPYRANIUM-3(4H)-YLIDENE]-L-LYSINE"),
   9.953 +('x', True, "ODA", "9-AMINO-6,10-DIOXO-OCTAHYDRO-PYRIDAZINO[1,2-A][1,2]DIAZEPINE-1-CARBOXYLIC"),
   9.954 +('x', True, "ODS", "4-METHYLPIPERAZIN-1-YL"),
   9.955 +('x', True, "OIC", "OCTAHYDROINDOLE-2-CARBOXYLIC"),
   9.956 +('x', True, "OLE", "2-HYDROXY-4-METHYL-PENTANOIC"),
   9.957 +('x', True, "ONL", "5-OXO-L-NORLEUCINE"),
   9.958 +('x', True, "OTB", "TERTIARY-BUTOXYFORMIC"),
   9.959 +('x', True, "PCE", "2-(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-PENT-4-YNOIC"),
   9.960 +('x', True, "PDD", "N-(5'-PHOSPHOPYRIDOXYL)-D-ALANINE"),
   9.961 +('x', True, "PDL", "N-(5'-PHOSPHOPYRIDOXYL)-L-ALANINE"),
   9.962 +('x', True, "PGL", "AMINOMETHYLENEPHOSPHINIC"),
   9.963 +('x', True, "PIV", "PIVALIC"),
   9.964 +('x', True, "PTA", "[(1-AMINO-3-METHYL-BUTYL)-HYDROXY-PHOSPHINOYL]-ACETIC"),
   9.965 +('x', True, "PVL", "PYRUVOYL"),
   9.966 +('x', True, "QLG", "[(4Z)-2-[(1Z)-4-AMINO-4-OXOBUTANIMIDOYL]-4-(2-METHYLPROPYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
   9.967 +('x', True, "R2P", "(2R)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC"),
   9.968 +('x', True, "RC7", "{(2R)-4-(4-HYDROXYBENZYL)-2-[2-(1H-IMIDAZOL-4-YL)ETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETALDEHYDE"),
   9.969 +('x', True, "RON", "NORVALINE"),
   9.970 +('x', True, "SD2", "N-(SULFANYLACETYL)TYROSYLPROLYLMETHIONINAMIDE"),
   9.971 +('x', True, "SEM", "3-AMINO-4-OXYBENZYL-2-BUTANONE"),
   9.972 +('x', True, "SIC", "(2R)-2-[(3S)-3-AMINO-2,5-DIOXOPYRROLIDIN-1-YL]-3-SULFANYLPROPANOIC"),
   9.973 +('x', True, "SIN", "SUCCINIC"),
   9.974 +('x', True, "SUB", "3-PHENYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PROPIONIC"),
   9.975 +('x', True, "SUI", "(3-AMINO-2,5-DIOXO-1-PYRROLIDINYL)ACETIC"),
   9.976 +('x', True, "T66", "2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-THYMINE"),
   9.977 +('x', True, "TA4", "(S)-2-[4-(AMINOMETHYL)-1H-1,2,3-TRIAZOL-1-YL]-4-METHYLPENTANOIC"),
   9.978 +('x', True, "TCQ", "3-((3E)-4-(HYDROXYMETHYL)-6-OXO-3-{[(1S,2R)-2-PHENYLCYCLOPROPYL]IMINO}CYCLOHEXA-1,4-DIEN-1-YL)ALANINE"),
   9.979 +('x', True, "TFA", "TRIFLUOROACETYL"),
   9.980 +('x', True, "THO", "REDUCED"),
   9.981 +('x', True, "TPH", "2-AMINO-3-PHENYL-PROPANE-1,1-DIOL"),
   9.982 +('x', True, "TST", "4-METHYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PENTANOIC"),
   9.983 +('x', True, "TYX", "S-(2-ANILINO-2-OXOETHYL)-L-CYSTEINE"),
   9.984 +('x', True, "TZB", "(4S)-2-[(1E)-1-AMINOPROP-1-ENYL]-4,5-DIHYDRO-1,3-THIAZOLE-4-CARBOXYLIC"),
   9.985 +('x', True, "TZO", "1,3-THIAZOLE-4-CARBOXYLIC"),
   9.986 +('x', True, "UN1", "2-AMINOHEXANEDIOIC"),
   9.987 +('x', True, "UN2", "2-AMINO-4,4-DIHYDROXYBUTANOIC"),
   9.988 +('x', True, "UNK", "UNKNOWN"),
   9.989 +('x', True, "VDL", "(2R,3R)-2,3-DIAMINOBUTANOIC"),
   9.990 +('x', True, "VLL", "(2S)-2,3-DIAMINOBUTANOIC"),
   9.991 +('x', True, "VLM", "VALINYLAMINE"),
   9.992 +('x', True, "VMS", "5'O-[N-(L-VALYL)SULPHAMOYL]ADENOSINE"),
   9.993 +('x', True, "VOL", "L-VALINOL"),
   9.994 +('x', True, "X9A", "amino[(3-{(2Z,5S,6S,9R,12S,13R,16R)-5,16-dicarboxy-2-ethylidene-12-[(1E,3E,5S,6R)-6-methoxy-3,5-dimethyl-7-phenylhepta-1,3-dien-1-yl]-1,6,13-trimethyl-3,7,10,14,19-pentaoxo-1,4,8,11,15-pentaazacyclononadecan-9-yl}propyl)amino]methaniminium"),
   9.995 +('x', True, "X9Q", "{(2S)-2-[(1S)-1-AMINOETHYL]-4-BENZYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"),
   9.996 +('x', True, "XCP", "(1S,2S)-2-AMINOCYCLOPENTANECARBOXYLIC"),
   9.997 +('x', True, "XPC", "(3S,4R)-4-AMINOPYRROLIDINE-3-CARBOXYLIC"),
   9.998 +('x', True, "XXY", "2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-1-(CARBOXYMETHYL)-4-(1H-IMIDAZOL-5-YLMETHYL)-1H-IMIDAZOL-5-OLATE"),
   9.999 +('x', True, "XYG", "[(4Z)-2-[(1Z)-ETHANIMIDOYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"),
  9.1000 +('x', True, "ZFB", "(3R)-3-{[(BENZYLOXY)CARBONYL]AMINO}-2-OXO-4-PHENYLBUTANE-1-DIAZONIUM"),
  9.1001 +('y', True, "0A1", "O-METHYL-L-TYROSINE"),
  9.1002 +('y', True, "1TY", "3-{(3E)-4-HYDROXY-6-OXO-3-[(2-PHENYLETHYL)IMINO]CYCLOHEXA-1,4-DIEN-1-YL}ALANINE"),
  9.1003 +('y', True, "2TY", "2-HYDROXY-5-{[(1E)-2-PHENYLETHYLIDENE]AMINO}-L-TYROSINE"),
  9.1004 +('y', True, "4BF", "4-BROMO-L-PHENYLALANINE"),
  9.1005 +('y', True, "AZY", "3-AZIDO-L-TYROSINE"),
  9.1006 +('y', True, "B3Y", "(3S)-3-AMINO-4-(4-HYDROXYPHENYL)BUTANOIC"),
  9.1007 +('y', True, "DBY", "3,5"),
  9.1008 +('y', True, "DPQ", "(S)-2-AMINO-3-(4-HYDROXY-3-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC"),
  9.1009 +('y', True, "DTY", "D-TYROSINE"),
  9.1010 +('y', True, "ESB", "3-[(3E)-3-(ETHYLIMINO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE"),
  9.1011 +('y', True, "FLT", "FLUOROMALONYL"),
  9.1012 +('y', True, "FTY", "DEOXY-DIFLUOROMETHELENE-PHOSPHOTYROSINE"),
  9.1013 +('y', True, "IYR", "3-IODO-TYROSINE"),
  9.1014 +('y', True, "MBQ", "2-HYDROXY-5-({1-[(4-METHYLPHENOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)-L-TYROSINE"),
  9.1015 +('y', True, "MTY", "META-TYROSINE"),
  9.1016 +('y', True, "NBQ", "2-HYDROXY-5-({1-[(2-NAPHTHYLOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)TYROSINE"),
  9.1017 +('y', True, "NIY", "META-NITRO-TYROSINE"),
  9.1018 +('y', True, "NTY", "BETA"),
  9.1019 +('y', True, "OTY", "2-AMINO-3-(4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC"),
  9.1020 +('y', True, "PAQ", "2-OXY-4-HYDROXY-5-(2-HYDRAZINOPYRIDINE)PHENYLALANINE"),
  9.1021 +('y', True, "PTH", "CE1-METHYLENE-HYDROXY-PHOSPHOTYROSINE"),
  9.1022 +('y', True, "PTM", "ALPHA-METHYL-O-PHOSPHOTYROSINE"),
  9.1023 +('y', True, "PTR", "O-PHOSPHOTYROSINE"),
  9.1024 +('y', True, "STY", "TYROSINE-O-SULPHONIC"),
  9.1025 +('y', True, "TPQ", "5-(2-CARBOXY-2-AMINOETHYL)-2-HYDROXY-1,4-BENZOQUINONE"),
  9.1026 +('y', True, "TTS", "3-((3E)-4-HYDROXY-3-{[2-(4-HYDROXYPHENYL)ETHYL]IMINO}-6-OXOCYCLOHEXA-1,4-DIEN-1-YL)ALANINE"),
  9.1027 +('y', True, "TY2", "3-AMINO-L-TYROSINE"),
  9.1028 +('y', True, "TY3", "3-HYDROXY-L-TYROSINE"),
  9.1029 +('y', True, "TYB", "TYROSINAL"),
  9.1030 +('y', True, "TYI", "3,5-DIIODOTYROSINE"),
  9.1031 +('y', True, "TYN", "AMINOBENZOFURAZAN-O-TYROSINE"),
  9.1032 +('y', True, "TYO", "(4Z,6E)-2-AMINO-7-HYDROPEROXY-4-[(E)-2-HYDROXYVINYL]HEPTA-4,6-DIENOIC"),
  9.1033 +('y', True, "TYQ", "3-AMINO-6-HYDROXY-TYROSINE"),
  9.1034 +('y', True, "TYS", "O-SULFO-L-TYROSINE"),
  9.1035 +('y', True, "TYT", "TYROSINE"),
  9.1036 +('y', True, "TYY", "3-(4-HYDROXY-3-IMINO-6-OXO-CYCLOHEXA-1,4-DIENYL)-ALANINE"),
  9.1037 +('y', True, "YOF", "3-FLUOROTYROSINE"),
  9.1038 +)
  9.1039 +
  9.1040 +rna = (
  9.1041 +('A', False, "A", "ADENOSINE-5'-MONOPHOSPHATE"),
  9.1042 +('C', False, "C", "CYTIDINE-5'-MONOPHOSPHATE"),
  9.1043 +('G', False, "G", "GUANOSINE-5'-MONOPHOSPHATE"),
  9.1044 +('U', False, "U", "URIDINE-5'-MONOPHOSPHATE"),
  9.1045 +('N', False, "", "Undefined RNA Nucelotide"),
  9.1046 +('a', True, "12A", "2-METHYLTHIO-N6-(AMINOCARBONYL-L-THREONYL)-ADENOSINE-5'-MONOPHOSPHATE"),
  9.1047 +('a', True, "1MA", "6-HYDRO-1-METHYLADENOSINE-5'-MONOPHOSPHATE"),
  9.1048 +('a', True, "2MA", "2-METHYLADENOSINE-5'-MONOPHOSPHATE"),
  9.1049 +('a', True, "5FA", "ADENOSINE-5'-PENTAPHOSPHATE"),
  9.1050 +('a', True, "6IA", "N6-ISOPENTENYL-ADENOSINE-5'-MONOPHOSPHATE"),
  9.1051 +('a', True, "6MA", "N6-METHYL-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"),
  9.1052 +('a', True, "6MC", "CIS-N6-METHYL-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"),
  9.1053 +('a', True, "6MT", "TRANS-N6-METHYL-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"),
  9.1054 +('a', True, "8AN", "3'-amino-3'-deoxyadenosine 5'-(dihydrogen phosphate)"),
  9.1055 +('a', True, "A23", "ADENOSINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"),
  9.1056 +('a', True, "A2L", "3'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"),
  9.1057 +('a', True, "A2M", "2'-O-METHYL-ADENOSINE-5'-MONOPHOSPHATE"),
  9.1058 +('a', True, "A39", "2'-O-METHYL-ADENOSINE 5'-MONOPHOSPHATE"),
  9.1059 +('a', True, "A3P", "ADENOSINE-3'-5'-DIPHOSPHATE"),
  9.1060 +('a', True, "A44", "2'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"),
  9.1061 +('a', True, "AET", "N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)METHYLCARBAMOYL]THREONINE-5'-MONOPHOSPHATE"),
  9.1062 +('a', True, "AVC", "ADENOSINE-5'-MONOPHOSPHATE-2',3'-VANADATE"),
  9.1063 +('a', True, "LCA", "[(1R,3R,4R,7S)-7-HYDROXY-3-(ADENIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"),
  9.1064 +('a', True, "MA6", "6N-DIMETHYLADENOSINE-5'-MONOPHOSHATE"),
  9.1065 +('a', True, "MAD", "6-HYDRO-1-METHYLADENOSINE-5'-MONOPHOSPHATE"),
  9.1066 +('a', True, "MGQ", "7-BENZYL"),
  9.1067 +('a', True, "MIA", "2-METHYLTHIO-N6-ISOPENTENYL-ADENOSINE-5'-MONOPHOSPHATE"),
  9.1068 +('a', True, "MTU", "9-BETA-D-RIBOFURANOSYL-9H-PURIN-2-AMINE"),
  9.1069 +('a', True, "P5P", "PURINE RIBOSIDE-5'-MONOPHOSPHATE"),
  9.1070 +('a', True, "PPU", "PUROMYCIN-5'-MONOPHOSPHATE"),
  9.1071 +('a', True, "PR5", "PURINE RIBOSIDE-5'-MONOPHOSPHATE"),
  9.1072 +('a', True, "PU", "PUROMYCIN-N-AMINOPHOSPHONIC"),
  9.1073 +('a', True, "RIA", "2'-O-[(5'-PHOSPHO)RIBOSYL]ADENOSINE-5'-MONOPHOSPHATE"),
  9.1074 +('a', True, "SRA", "ADENOSINE -5'-THIO-MONOPHOSPHATE"),
  9.1075 +('a', True, "T6A", "N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)CARBAMOYL]THREONINE-5'-MONOPHOSPHATE"),
  9.1076 +('a', True, "ZAD", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-ADENINE"),
  9.1077 +('c', True, "10C", "4-AMINO-1-{2,5-ANHYDRO-4-[(PHOSPHONOOXY)METHYL]-ALPHA-L-LYXOFURANOSYL}PYRIMIDIN-2(1H)-ONE"),
  9.1078 +('c', True, "1SC", "5'-O-THIOPHOSPHONOCYTIDINE"),
  9.1079 +('c', True, "4OC", "4N,O2'-METHYLCYTIDINE-5'-MONOPHOSPHATE"),
  9.1080 +('c', True, "5IC", "5-IODO-CYTIDINE-5'-MONOPHOSPHATE"),
  9.1081 +('c', True, "5MC", "5-METHYLCYTIDINE-5'-MONOPHOSPHATE"),
  9.1082 +('c', True, "A5M", "2'-AMINE-CYTIDINE-5'-MONOPHOSPHATE"),
  9.1083 +('c', True, "C25", "CYTIDINE-5'-MONOPHOSPHATE"),
  9.1084 +('c', True, "C2L", "5-METHYL-3'-O-METHOXYETHYL CYTIDINE 5'-MONOPHOSPHATE"),
  9.1085 +('c', True, "C31", "2'-O-3-AMINOPROPYL CYTIDINE-5'-MONOPHOSPHATE"),
  9.1086 +('c', True, "C43", "2'-O-METHYOXYETHYL-CYTIDINE-5'-MONOPHOSPHATE"),
  9.1087 +('c', True, "CBV", "5-BROMOCYTIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  9.1088 +('c', True, "CCC", "CYTIDINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"),
  9.1089 +('c', True, "CH", "N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"),
  9.1090 +('c', True, "CSF", "CYTIDINE-5'-MONOPHOSPHATE-3-FLUORO-N-ACETYL-NEURAMINIC ACID"),
  9.1091 +('c', True, "IC", "ISOCYTIDINE-5'-MONOPHOSPHATE"),
  9.1092 +('c', True, "LC", "L-CYTIDINE-5'-MONOPHOSPHATE"),
  9.1093 +('c', True, "M5M", "2'-(N-ACETAMIDE)-CYTIDINE-5'-MONOPHOSPHATE"),
  9.1094 +('c', True, "N5M", "5-nitrocytidine 5'-(dihydrogen phosphate)"),
  9.1095 +('c', True, "OMC", "O2'-METHYLYCYTIDINE-5'-MONOPHOSPHATE"),
  9.1096 +('c', True, "PMT", "PHOSPHORIC"),
  9.1097 +('c', True, "S4C", "4'-THIO-4'-DEOXY-CYTOSINE-5'-MONOPHOSPHATE"),
  9.1098 +('c', True, "ZCY", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-CYTOSINE"),
  9.1099 +('g', True, "1MG", "1N-METHYLGUANOSINE-5'-MONOPHOSPHATE"),
  9.1100 +('g', True, "2MG", "2N-METHYLGUANOSINE-5'-MONOPHOSPHATE"),
  9.1101 +('g', True, "7MG", "7N-METHYL-8-HYDROGUANOSINE-5'-MONOPHOSPHATE"),
  9.1102 +('g', True, "CG1", "5'-O-[(R)-HYDROXY(METHOXY)PHOSPHORYL]GUANOSINE"),
  9.1103 +('g', True, "G25", "GUANOSINE-5'-MONOPHOSPHATE"),
  9.1104 +('g', True, "G2L", "3'-O-METHYOXYETHYL-GUANOSINE-5'-MONOPHOSPHATE"),
  9.1105 +('g', True, "G46", "2'-DEOXY-GUANOSINE-5'-MONOTHIOPHOSPHATE"),
  9.1106 +('g', True, "G48", "2'-O-METHYOXYETHYL-GUANOSINE-5'-MONOPHOSPHATE"),
  9.1107 +('g', True, "G7M", "N7-METHYL-GUANOSINE-5'-MONOPHOSPHATE"),
  9.1108 +('g', True, "GAO", "GUANINE ARABINOSE-5'-PHOSPHATE"),
  9.1109 +('g', True, "GDP", "GUANOSINE-5'-DIPHOSPHATE"),
  9.1110 +('g', True, "GH3", "3'-DEOXY-GUANOSINE-5'-TRIPHOSPHATE"),
  9.1111 +('g', True, "GOM", "GLUTAMOL-AMP"),
  9.1112 +('g', True, "GTP", "GUANOSINE-5'-TRIPHOSPHATE"),
  9.1113 +('g', True, "IG", "ISOGUANOSINE-5'-MONOPHOSPHATE"),
  9.1114 +('g', True, "KAG", "2'-DEOXY-N-[(1S)-1-METHYL-3-OXOPROPYL]GUANOSINE 5'-PHOSPHATE"),
  9.1115 +('g', True, "LG", "L-GUANOSINE-5'-MONOPHOSPHATE"),
  9.1116 +('g', True, "M2G", "N2-DIMETHYLGUANOSINE-5'-MONOPHOSPHATE"),
  9.1117 +('g', True, "MGV", "P-FLUORO-7-BENZYL"),
  9.1118 +('g', True, "N6G", "((2R,3S,4R,5S)-5-(2,6-DIAMINO-9H-PURIN-9-YL)-3,4-DIHYDROXY-TETRAHYDROFURAN-2-YL)METHYL"),
  9.1119 +('g', True, "OMG", "O2'-METHYLGUANOSINE-5'-MONOPHOSPHATE"),
  9.1120 +('g', True, "PGP", "GUANOSINE-3',5'-DIPHOSPHATE"),
  9.1121 +('g', True, "QUO", "2-AMINO-7-DEAZA-(2'',3''-DIHYDROXY-CYCLOPENTYLAMINO)-GUANOSINE-5'-MONOPHOSPHATE"),
  9.1122 +('g', True, "TPG", "2,2,7-TRIMETHYL-GUANOSINE-5'-TRIPHOSPHATE-5'-GUANOSINE"),
  9.1123 +('g', True, "XTS", "9-[(2R,3R,4S,5R)-3,4-DIHYDROXY-5-(HYDROXYMETHYL)OXOLAN-2-YL]-3H-PURINE-2,6-DIONE"),
  9.1124 +('g', True, "YG", "WYBUTOSINE"),
  9.1125 +('g', True, "YYG", "4-(3-[5-O-PHOSPHONORIBOFURANOSYL]-4,6-DIMETHYL-8-OXO-4,8-DIHYDRO-3H-1,3,4,5,7A-PENTAAZA-S-INDACEN-YLAMINO-BUTYRIC)"),
  9.1126 +('g', True, "ZGU", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-GUANINE"),
  9.1127 +('n', True, "128", "SPIRO(2,4,6-TRINITROBENZENE[1,2A]-2O',3O'-METHYLENE-ADENINE-TRIPHOSPHATE"),
  9.1128 +('n', True, "1PR", "PHOSPHORIC"),
  9.1129 +('n', True, "2SA", "2-[9-(3,4-DIHYDROXY-5-PHOSPHONOOXYMETHYL-TETRAHYDRO-FURAN-2-YL)-9H-PURIN-6-YLAMINO]-SUCCINIC"),
  9.1130 +('n', True, "4AC", "N(4)-ACETYLCYTIDINE-5'-MONOPHOSPHATE"),
  9.1131 +('n', True, "4TA", "P1-(5'-ADENOSYL)P4-(5'-(2'-DEOXY-THYMIDYL))TETRAPHOSPHATE"),
  9.1132 +('n', True, "6MZ", "N6-METHYLADENOSINE-5'-MONOPHOSPHATE"),
  9.1133 +('n', True, "ADX", "ADENOSINE-5'-PHOSPHOSULFATE"),
  9.1134 +('n', True, "BMP", "6-HYDROXYURIDINE-5'-PHOSPHATE"),
  9.1135 +('n', True, "BT5", "BIOTINYL-5-AMP"),
  9.1136 +('n', True, "CM0", "5-(CARBOXYMETHOXY) URIDINE-5'-MONOPHOSPHATE"),
  9.1137 +('n', True, "CS8", "3-THIAOCTANOYL-COENZYME"),
  9.1138 +('n', True, "DBM", "9-(6-DEOXY-BETA-D-ALLOFURANOSYL)-6-METHYLPURINE"),
  9.1139 +('n', True, "ENA", "ETHENO-NAD"),
  9.1140 +('n', True, "ENP", "ETHENO-NADP"),
  9.1141 +('n', True, "FA5", "ADENOSINE-5'-[PHENYLALANINYL-PHOSPHATE]"),
  9.1142 +('n', True, "FAI", "5-(FORMYLAMINO)-1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE-4-CARBOXAMIDE"),
  9.1143 +('n', True, "FMU", "5-FLUORO-5-METHYLURIDINE-5'-MONOPHOSPHATE"),
  9.1144 +('n', True, "G4P", "GUANOSINE-5',3'-TETRAPHOSPHATE"),
  9.1145 +('n', True, "I", "INOSINIC"),
  9.1146 +('n', True, "IRN", "1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE"),
  9.1147 +('n', True, "JW5", "6-(HYDROXYMETHYL)URIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  9.1148 +('n', True, "LMS", "[(2R,3S,4R,5R)-5-(6-AMINO-9H-PURIN-9-YL)-3,4-DIHYDROXYTETRAHYDRO-2-FURANYL]METHYL"),
  9.1149 +('n', True, "N", "ANY 5'-MONOPHOSPHATE NUCLEOTIDE"),
  9.1150 +('n', True, "NF2", "(1S)-1,4-ANHYDRO-1-(2,4-DIFLUORO-5-METHYLPHENYL)-5-O-PHOSPHONO-D-RIBITOL"),
  9.1151 +('n', True, "OAD", "2'-O-ACETYL ADENOSINE-5-DIPHOSPHORIBOSE"),
  9.1152 +('n', True, "ODP", "4-OXO-NICOTINAMIDE-ADENINE"),
  9.1153 +('n', True, "P1P", "3-{[OXIDO(OXO)PHOSPHINO]OXY}PROPAN-1-OLATE"),
  9.1154 +('n', True, "PQ1", "PHOSPHORIC"),
  9.1155 +('n', True, "PUY", "PUROMYCIN"),
  9.1156 +('n', True, "PYY", "D-RIBOFURANOSYL-BENZENE-5'-MONOPHOSPHATE"),
  9.1157 +('n', True, "RT", "RIBOSYLTHYMINE-5'-MONOPHOSPHATE"),
  9.1158 +('n', True, "RTP", "RIBAVIRIN"),
  9.1159 +('n', True, "SMT", "2'-[(METHYLTHIO)ETHYLOXY]-THYMIDINE-5'-MONOPHOSPHATE"),
  9.1160 +('n', True, "T23", "2'-O-METHYL-3'-METHYL-3'-DEOXY-ARABINOFURANOSYL-THYMINE-5'-PHOSPHATE"),
  9.1161 +('n', True, "T2S", "THYMIDINE-5'-DITHIOPHOSPHORATE"),
  9.1162 +('n', True, "T38", "2'-O-ETHOXYMETHYLENE THYMIDINE 5'-MONOPHOSPHATE"),
  9.1163 +('n', True, "T39", "2'-O-METHOXYETHYLENE THYMIDINE 5'-MONOPHOSPHATE"),
  9.1164 +('n', True, "T41", "1-(2-O-METHYL-BETA-D-ARABINOFURANOSYL)THYMIDINE 5'-MONOPHOSPHATE"),
  9.1165 +('n', True, "TAL", "9-(6-DEOXY-ALPHA-L-TALOFURANOSYL)-6-METHYLPURINE"),
  9.1166 +('n', True, "TLB", "2'-O,3'-C-METHYLENE-ARABINOFURANOSYL-THYMINE-5'-MONOPHOSPHATE"),
  9.1167 +('n', True, "TYU", "TETRAHYDROURIDINE"),
  9.1168 +('n', True, "UDP", "URIDINE-5'-DIPHOSPHATE"),
  9.1169 +('n', True, "ZHP", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-HYDROXYPYRIDONE"),
  9.1170 +('n', True, "ZTH", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-THYMINE"),
  9.1171 +('u', True, "125", "2'-O-FLUOROETHYL-5-METHYL-URIDINE-5'-MONOPHOSPHATE"),
  9.1172 +('u', True, "126", "2'-O-METHYL-[TRI(OXYETHYL)]-5-METHYL-URIDINE-5'-MONOPHOSPHATE"),
  9.1173 +('u', True, "127", "2'-O-AMINOOXY-ETHYL-5-METHYL-URIDINE-5'-MONOPHOSPHATE"),
  9.1174 +('u', True, "2AU", "2'-AMINOURIDINE"),
  9.1175 +('u', True, "2MU", "2',5-DIMETHYLURIDINE-5'-MONOPHOSPHATE"),
  9.1176 +('u', True, "4SU", "4-THIOURIDINE-5'-MONOPHOSPHATE"),
  9.1177 +('u', True, "5BU", "5-BROMO-URIDINE-5'-MONOPHOSPHATE"),
  9.1178 +('u', True, "5FU", "5-FLUORO-URIDINE-5'-MONOPHOSPHATE"),
  9.1179 +('u', True, "5MU", "5-METHYLURIDINE 5'-MONOPHOSPHATE"),
  9.1180 +('u', True, "70U", "5-(O-METHYLACETO)-2-THIO-2-DEOXY-URIDINE-5'-MONOPHOSPHATE"),
  9.1181 +('u', True, "CNU", "5-CYANO-URIDINE-5'-MONOPHOSPHATE"),
  9.1182 +('u', True, "DHU", "5,6-DIHYDROURIDINE-5'-PHOSPHATE"),
  9.1183 +('u', True, "FHU", "(5S,6R)-5-FLUORO-6-HYDROXY-PSEUDOURIDINE-5'-MONOPHOSPHATE"),
  9.1184 +('u', True, "H2U", "5,6-DIHYDROURIDINE-5'-MONOPHOSPHATE"),
  9.1185 +('u', True, "IU", "5-IODOURIDINE-5'-MONOPHOSPHATE"),
  9.1186 +('u', True, "LHU", "L-URIDINE-5'-MONOPHOSPHATE"),
  9.1187 +('u', True, "MEP", "PHOSPHORIC"),
  9.1188 +('u', True, "MNU", "(2R,4S)-1-[(4R)-3,4-DIHYDROXYTETRAHYDROFURAN-2-YL]-5-[(METHYLAMINO)METHYL]-1,2,3,4-TETRAHYDROPYRIMIDINE-2,4-DIOL-5'-MONOPHOSPHATE"),
  9.1189 +('u', True, "OMU", "O2'-METHYLURIDINE 5'-MONOPHOSPHATE"),
  9.1190 +('u', True, "ONE", "1-(BETA-D-RIBOFURANOSYL)-PYRIDIN-4-ONE-5'-PHOSPHATE"),
  9.1191 +('u', True, "PSU", "PSEUDOURIDINE-5'-MONOPHOSPHATE"),
  9.1192 +('u', True, "PYO", "1-(BETA-D-RIBOFURANOSYL)-PYRIMIDIN-2-ONE-5'-PHOSPHATE"),
  9.1193 +('u', True, "S4U", "4-THIOURIDINE-5'-PHOSPHATE"),
  9.1194 +('u', True, "SSU", "URIDINE-5'-PHOSPHOROTHIOATE"),
  9.1195 +('u', True, "SUR", "1-(BETA-D-RIBOFURANOSYL)-2-THIO-URACIL-5'-PHOSPHATE"),
  9.1196 +('u', True, "T31", "THYMIDINE 5'-MONOTHIOPHOSPHATE"),
  9.1197 +('u', True, "U25", "URIDINE-5'-MONOPHOSPHATE"),
  9.1198 +('u', True, "U2L", "5-METHYL-3'-O-METHOXYETHYL URIDINE-5'-MONOPHOSPHATE"),
  9.1199 +('u', True, "U2P", "PHOSPHORIC"),
  9.1200 +('u', True, "U31", "2'-O-3-AMINOPROPYL 2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
  9.1201 +('u', True, "U34", "URIDINE 5'-MONOMETHYLPHOSPHATE"),
  9.1202 +('u', True, "U36", "2'-O-METHYOXYETHYL-URIDINE-5'-MONOPHOSPHATE"),
  9.1203 +('u', True, "U37", "URIDINE 5'-MONOTHIOPHOSPHATE"),
  9.1204 +('u', True, "U8U", "5-METHYLAMINOMETHYL-2-THIOURIDINE-5'-MONOPHOSPHATE"),
  9.1205 +('u', True, "UAR", "URACIL ARABINOSE-5'-PHOSPHATE"),
  9.1206 +('u', True, "UD5", "5-FLUOROURIDINE"),
  9.1207 +('u', True, "UMP", "2'-DEOXYURIDINE 5'-MONOPHOSPHATE"),
  9.1208 +('u', True, "UR3", "3-METHYLURIDINE-5'-MONOPHOSHATE"),
  9.1209 +('u', True, "URD", "1-((2R,3R,4S,5R)-TETRAHYDRO-3,4-DIHYDROXY-5-(HYDROXYMETHYL)FURAN-2-YL)PYRIDINE-2,4(1H,3H)-DIONE"),
  9.1210 +)
  9.1211 +
  9.1212 +# vim: set et ts=4 sts=4 sw=4:
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/allpy/data/codes_template.txt	Tue Jan 25 16:03:00 2011 +0300
    10.3 @@ -0,0 +1,17 @@
    10.4 +"""Tables of monomer codes.
    10.5 +
    10.6 +`dna`, `rna`, `protein` are lists of all known codes for monomers of given
    10.7 +type. Each of them is a list of tuples of kind:
    10.8 +
    10.9 +    ( 1-letter code, is-modified?, 3-letter-code, fullname )
   10.10 +
   10.11 +`3-letter-code` is the code used in PDB (it may actually be one or
   10.12 +two letters)
   10.13 +
   10.14 +"""
   10.15 +
   10.16 +protein = %(protein)s
   10.17 +dna = %(dna)s
   10.18 +rna = %(rna)s
   10.19 +
   10.20 +# vim: set et ts=4 sts=4 sw=4:
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/allpy/data/mkcodes.py	Tue Jan 25 16:03:00 2011 +0300
    11.3 @@ -0,0 +1,96 @@
    11.4 +import os
    11.5 +import argparse
    11.6 +from pprint import pformat
    11.7 +
    11.8 +def rel(*x):
    11.9 +    return os.path.join(os.path.abspath(os.path.dirname(__file__)), *x)
   11.10 +
   11.11 +p = argparse.ArgumentParser(
   11.12 +description='Components.cif to codes.py converter',
   11.13 +epilog='',
   11.14 +formatter_class=argparse.ArgumentDefaultsHelpFormatter,
   11.15 +)
   11.16 +
   11.17 +r = argparse.FileType('r')
   11.18 +w = argparse.FileType('w')
   11.19 +
   11.20 +p.add_argument('-v','--version',action='version',version='%(prog)s 1.0')
   11.21 +p.add_argument('-i',help='input components.cif',metavar='FILE',type=r,
   11.22 +    required=True)
   11.23 +p.add_argument('-o',help='output codes.py',metavar='FILE',type=w,
   11.24 +    default=rel('codes.py'))
   11.25 +p.add_argument('-t',help='Template for codes.py',metavar='FILE',type=r,
   11.26 +    default=rel('codes_template.txt'))
   11.27 +
   11.28 +try:
   11.29 +    args = p.parse_args()
   11.30 +except Exception, t:
   11.31 +    print t
   11.32 +    exit()
   11.33 +
   11.34 +AAbank = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D', 'CYS':'C',
   11.35 +        'GLN':'Q', 'GLU':'E', 'GLY':'G', 'HIS':'H', 'ILE':'I',
   11.36 +        'LEU':'L', 'LYS':'K', 'MET':'M', 'PHE':'F', 'PRO':'P',
   11.37 +        'SER':'S', 'THR':'T', 'TRP':'W', 'TYR':'Y', 'VAL':'V',
   11.38 +        'DA' :'A', 'DT' :'T', 'DG' :'G', 'DC' :'C',
   11.39 +        'A':  'A', 'U'  :'U', 'G'  :'G', 'C'  :'C'}
   11.40 +
   11.41 +protein = []
   11.42 +dna = []
   11.43 +rna = []
   11.44 +
   11.45 +cif_entry = {}
   11.46 +
   11.47 +for line in args.i:
   11.48 +    line = line.strip()
   11.49 +    if line == '#' and cif_entry:
   11.50 +        try:
   11.51 +            monomer_type = cif_entry['_chem_comp.type'].strip()
   11.52 +            if "PEPTIDE" in monomer_type:
   11.53 +                container = protein
   11.54 +            elif "DNA" in monomer_type:
   11.55 +                container = dna
   11.56 +            elif "RNA" in monomer_type:
   11.57 +                container = rna
   11.58 +            else:
   11.59 +                continue
   11.60 +            code1 = cif_entry['_chem_comp.one_letter_code'].strip().lower()
   11.61 +            if code1 == '?':
   11.62 +                parent = cif_entry['_chem_comp.mon_nstd_parent_comp_id']
   11.63 +                parent = parent.strip().upper()
   11.64 +                if parent in AAbank:
   11.65 +                    code1 = AAbank[parent].lower()
   11.66 +                else:
   11.67 +                    if container == protein:
   11.68 +                        code1 = 'x'
   11.69 +                    else:
   11.70 +                        code1 = 'n'
   11.71 +            code3 = cif_entry['_chem_comp.three_letter_code'].strip().upper()
   11.72 +            name = cif_entry['_chem_comp.name'].strip()
   11.73 +            if name[0] == '"' and name[-1] == '"':
   11.74 +                name = name[1:-1]
   11.75 +            modified = code3 not in AAbank
   11.76 +
   11.77 +            container.append((code1, modified, code3, name))
   11.78 +
   11.79 +            cif_entry = {} # for new entry
   11.80 +        except:
   11.81 +            pass
   11.82 +    else:
   11.83 +        key_value = line.split(' ', 1)
   11.84 +        if len(key_value) == 2:
   11.85 +            key, value = key_value
   11.86 +            cif_entry[key] = value
   11.87 +
   11.88 +protein.append(('X', False, "", "Undefined Aminoacid"))
   11.89 +dna.append(('N', False, "", "Undefined DNA Nucelotide"))
   11.90 +rna.append(('N', False, "", "Undefined RNA Nucelotide"))
   11.91 +
   11.92 +protein.sort()
   11.93 +dna.sort()
   11.94 +rna.sort()
   11.95 +
   11.96 +template = args.t.read()
   11.97 +args.o.write(template % {'protein': pformat(protein, width=1024),
   11.98 +    'dna': pformat(dna, width=1024),
   11.99 +    'rna': pformat(rna, width=1024)})
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/allpy/dna.py	Tue Jan 25 16:03:00 2011 +0300
    12.3 @@ -0,0 +1,21 @@
    12.4 +import base
    12.5 +import data.codes
    12.6 +
    12.7 +class Monomer(base.Monomer):
    12.8 +    """DNA monomers: nucleotides."""
    12.9 +    type = 'dna'
   12.10 +    by_code1 = {}
   12.11 +    by_code3 = {}
   12.12 +    by_name = {}
   12.13 +Monomer._initialize(data.codes.dna)
   12.14 +
   12.15 +class Sequence(base.Sequence):
   12.16 +    monomer_type = Monomer
   12.17 +
   12.18 +class Alignment(base.Alignment):
   12.19 +    sequence_type = Sequence
   12.20 +
   12.21 +class Block(Alignment, base.Block):
   12.22 +    pass
   12.23 +
   12.24 +# vim: set ts=4 sts=4 sw=4 et:
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/allpy/fasta.py	Tue Jan 25 16:03:00 2011 +0300
    13.3 @@ -0,0 +1,43 @@
    13.4 +import util
    13.5 +
    13.6 +def parse_file(file):
    13.7 +    """Parse fasta file, remove spaces and newlines from sequence bodies.
    13.8 +
    13.9 +    Return a list of tuples (name, description, sequence_body).
   13.10 +    """
   13.11 +    sequences = []
   13.12 +    for part in file.read().split("\n>"):
   13.13 +        header, _, body = part.partition("\n")
   13.14 +        header = header.lstrip(">").strip()
   13.15 +        name, _, description = header.partition(" ")
   13.16 +        name = name.strip()
   13.17 +        description = description.strip()
   13.18 +        body = util.remove_each(body, " \n\r\t\v")
   13.19 +        sequences.append((name, description, body))
   13.20 +    return sequences
   13.21 +
   13.22 +def save_file(out_file, string, name, description='', long_line=70):
   13.23 +    """ Saves given string to out_file in fasta_format
   13.24 +
   13.25 +    Splits long lines to substrings of length=long_line
   13.26 +    To prevent this, set long_line=None
   13.27 +    """
   13.28 +    if description:
   13.29 +        name += " " + description
   13.30 +    out_file.write(">%s\n" % name)
   13.31 +    if long_line:
   13.32 +        for i in range(0, len(string) // long_line + 1):
   13.33 +            out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
   13.34 +    else:
   13.35 +        out_file.write("%s \n" % string)
   13.36 +
   13.37 +def determine_long_line(in_file):
   13.38 +    """ Returns maximum sequence line length in fasta file """
   13.39 +    sequences = in_file.read().split('>')
   13.40 +    for sequence in sequences[1:]:
   13.41 +        lines = sequence.split('\n')[1:]
   13.42 +        if len(lines) >= 2:
   13.43 +            return len(lines[0].strip())
   13.44 +    return 70
   13.45 +
   13.46 +# vim: set ts=4 sts=4 sw=4 et:
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/allpy/graph.py	Tue Jan 25 16:03:00 2011 +0300
    14.3 @@ -0,0 +1,288 @@
    14.4 +# -*- coding: utf-8 -*-
    14.5 +
    14.6 +from datetime import datetime, timedelta
    14.7 +from copy import copy
    14.8 +
    14.9 +class TimeoutError(Exception):
   14.10 +    pass
   14.11 +
   14.12 +
   14.13 +
   14.14 +class Graph(object):
   14.15 +    """ Undirected weighted graph
   14.16 +
   14.17 +    Data:
   14.18 +    nodes -- set of elements
   14.19 +    lines -- {line: cost}.
   14.20 +        line is frozenset([e1, e2])
   14.21 +        cost is float in (0, 1] or 1 (if all lines are equal)
   14.22 +
   14.23 +    >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1})
   14.24 +    >>> g.fast_cliques()
   14.25 +    Fast algorithm started
   14.26 +    [frozenset([1, 2]), frozenset([3])]
   14.27 +    >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1, frozenset([1,1]): 1})
   14.28 +    >>> g.fast_cliques()
   14.29 +    Fast algorithm started
   14.30 +    [frozenset([1, 2]), frozenset([3])]
   14.31 +    >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98,
   14.32 +    ... frozenset([2,3]): 0.1, frozenset([1,1]): 1})
   14.33 +    >>> g.fast_cliques()
   14.34 +    Fast algorithm started
   14.35 +    [frozenset([1, 2, 3]), frozenset([4])]
   14.36 +    >>> g.bron_kerbosh()
   14.37 +    Bron and Kerbosh algorithm started
   14.38 +    [frozenset([1, 2, 3]), frozenset([4])]
   14.39 +    >>> g.cliques()
   14.40 +    Bron and Kerbosh algorithm started
   14.41 +    [frozenset([1, 2, 3])]
   14.42 +    """
   14.43 +
   14.44 +    def __init__(self, nodes=None, lines=None):
   14.45 +        if not nodes:
   14.46 +            nodes = set()
   14.47 +        if not lines:
   14.48 +            lines = dict()
   14.49 +        self.nodes = set(nodes) # copy
   14.50 +        self.lines = {}
   14.51 +        for line, cost in lines.items():
   14.52 +            if len(line) == 2 and line.issubset(self.nodes):
   14.53 +                self.lines[line] = cost
   14.54 +
   14.55 +    @staticmethod
   14.56 +    def line(k1, k2):
   14.57 +        """ Construct object, representing line of graph """
   14.58 +        return frozenset([k1, k2])
   14.59 +
   14.60 +    def bounded(self, k1, k2):
   14.61 +        """ Return if these two nodes of the graph are bounded with line """
   14.62 +        return k1 == k2 or Graph.line(k1, k2) in self.lines
   14.63 +
   14.64 +    def count_one(self, node):
   14.65 +        """ Returns number of connections of this node """
   14.66 +        return len([node1 for node1 in self.nodes if self.bounded(node, node1)]) - 1
   14.67 +
   14.68 +    def cost_one(self, node):
   14.69 +        """ Returns sum of costs of all connections of this node """
   14.70 +        return sum([self.lines.get(Graph.line(node, node1), 0)
   14.71 +        for node1 in self.nodes if node != node1])
   14.72 +
   14.73 +    def count_all(self):
   14.74 +        """ Returns {node: number of connections of this node} """
   14.75 +        c = dict([(node, 0) for node in self.nodes])
   14.76 +        for line in self.lines:
   14.77 +            for node in line:
   14.78 +                c[node] += 1
   14.79 +        return c
   14.80 +
   14.81 +
   14.82 +    def drop_node(self, node):
   14.83 +        """ Remove node and all involved lines """
   14.84 +        for node1 in self.nodes:
   14.85 +            self.lines.pop(Graph.line(node, node1), None)
   14.86 +        self.nodes.discard(node)
   14.87 +
   14.88 +    def add_node(self, node, parent_graph):
   14.89 +        """ Add node and corresponding lines from parent_graph
   14.90 +
   14.91 +        Added lines should be contained in self graph
   14.92 +        (takes care of hanging lines)
   14.93 +        """
   14.94 +        self.nodes.add(node)
   14.95 +        for node1 in self.nodes:
   14.96 +            line = Graph.line(node, node1)
   14.97 +            if line in parent_graph.lines:
   14.98 +                self.lines[line] = parent_graph.lines[line]
   14.99 +
  14.100 +    def drop_nodes(self, nodes):
  14.101 +        """ Run drop_node for each of given nodes
  14.102 +
  14.103 +        Returns if nodes was not empty (ugly beauty)
  14.104 +        """
  14.105 +        for node in nodes:
  14.106 +            self.drop_node(node)
  14.107 +        return bool(nodes)
  14.108 +
  14.109 +    def drop_if_count(self, minsize):
  14.110 +        """ Run drop_node for each node, that has less than minsize lines """
  14.111 +        while True:
  14.112 +            if not self.drop_nodes([node for (node, count)
  14.113 +            in self.count_all().items() if count < minsize]):
  14.114 +                break
  14.115 +
  14.116 +    def bron_kerbosh(self, timeout=-1, minsize=1):
  14.117 +        """ Bron and Kerboch algorithm implementation
  14.118 +
  14.119 +        returns list of cliques
  14.120 +        clique is frozenset
  14.121 +        if timeout=-1, it means infinity
  14.122 +        if timeout has happened, raises TimeoutError
  14.123 +
  14.124 +        lava flow
  14.125 +        """
  14.126 +        print 'Bron and Kerbosh algorithm started'
  14.127 +        cliques = []
  14.128 +
  14.129 +        depth = 0
  14.130 +        list_candidates = [copy(self.nodes)]
  14.131 +        list_used = [set()]
  14.132 +        compsub = []
  14.133 +
  14.134 +        start_time = datetime.now()
  14.135 +        timeout_timedelta = timedelta(timeout)
  14.136 +
  14.137 +        while True: # ПОКА...
  14.138 +            if depth == -1:
  14.139 +                break  # ВСЕ! Все рекурсии (итерации) пройдены
  14.140 +            candidates = copy(list_candidates[depth])
  14.141 +            used = copy(list_used[depth])
  14.142 +            if not candidates: # ПОКА candidates НЕ пусто
  14.143 +                depth -= 1
  14.144 +                if compsub:
  14.145 +                    compsub.pop()
  14.146 +                continue
  14.147 +
  14.148 +            # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates
  14.149 +            # (все из used НЕ соединены хотя бы с 1 из candidates)
  14.150 +            used_candidates = False
  14.151 +
  14.152 +            for used1 in used:
  14.153 +                for candidates1 in candidates:
  14.154 +                    if not self.bounded(used1, candidates1):
  14.155 +                        break
  14.156 +                else:
  14.157 +                    used_candidates = True
  14.158 +
  14.159 +            if used_candidates:
  14.160 +                depth -= 1
  14.161 +
  14.162 +                if compsub:
  14.163 +                    compsub.pop()
  14.164 +                continue
  14.165 +
  14.166 +            # Выбираем вершину v из candidates и добавляем ее в compsub
  14.167 +            v = candidates.pop()
  14.168 +            candidates.add(v)
  14.169 +            compsub.append(v)
  14.170 +            # Формируем new_candidates и new_used, удаляя из candidates и used вершины, НЕ соединенные с v
  14.171 +            # (то есть, оставляя только соединенные с v)
  14.172 +            new_candidates = set()
  14.173 +            for candidates1 in candidates:
  14.174 +                if self.bounded(candidates1, v) and candidates1 != v:
  14.175 +                    new_candidates.add(candidates1)
  14.176 +
  14.177 +            new_used = set()
  14.178 +            for used1 in used:
  14.179 +                if self.bounded(used1, v) and used1 != v:
  14.180 +                    new_used.add(used1)
  14.181 +
  14.182 +            # Удаляем v из candidates и помещаем в used
  14.183 +            list_candidates[depth].remove(v)
  14.184 +            list_used[depth].add(v)
  14.185 +            # ЕСЛИ new_candidates и new_used пусты
  14.186 +            if not new_candidates and not new_used:
  14.187 +                # compsub ? клика
  14.188 +                if len(compsub) >= minsize:
  14.189 +                    cliques.append(frozenset(compsub))
  14.190 +            else:
  14.191 +                # ИНАЧЕ рекурсивно вызываем bron_kerbosh(new_candidates, new_used)
  14.192 +                depth += 1
  14.193 +
  14.194 +                # TIMEOUT check start
  14.195 +                if timeout != -1:
  14.196 +                    if datetime.now() - start_time > timeout_timedelta:
  14.197 +                        raise TimeoutError
  14.198 +                # TIMEOUT check end
  14.199 +
  14.200 +                if depth >= len(list_candidates):
  14.201 +                    list_candidates.append(set())
  14.202 +                    list_used.append(set())
  14.203 +
  14.204 +                list_candidates[depth] = copy(new_candidates)
  14.205 +                list_used[depth] = copy(new_used)
  14.206 +
  14.207 +                continue
  14.208 +
  14.209 +            # Удаляем v из compsub
  14.210 +            if compsub:
  14.211 +                compsub.pop()
  14.212 +
  14.213 +        return cliques
  14.214 +
  14.215 +
  14.216 +    def fast_cliques(self, minsize=1):
  14.217 +        """ returns list of cliques
  14.218 +
  14.219 +        clique is frozenset
  14.220 +        """
  14.221 +        print 'Fast algorithm started'
  14.222 +        cliques = []
  14.223 +
  14.224 +        while True:
  14.225 +            graph = Graph(self.nodes, self.lines)
  14.226 +            for clique in cliques:
  14.227 +                graph.drop_nodes(clique)
  14.228 +            if not graph.nodes:
  14.229 +                break
  14.230 +
  14.231 +            while True:
  14.232 +                # drop nodes, while its is possible
  14.233 +                if len(graph.nodes) == 1:
  14.234 +                    break
  14.235 +                c = graph.count_all()
  14.236 +                min_count = min(c.values())
  14.237 +                bad_nodes = [node for (node, count) in c.items() if count == min_count]
  14.238 +                if len(bad_nodes) == len(graph.nodes) and min_count != 0:
  14.239 +                    break
  14.240 +
  14.241 +                costs = dict([(node, graph.cost_one(node)) for node in bad_nodes])
  14.242 +                min_cost = min(costs.values())
  14.243 +                for node, cost in costs.items():
  14.244 +                    if cost == min_cost:
  14.245 +                        graph.drop_node(node)
  14.246 +                        break
  14.247 +
  14.248 +            while True:
  14.249 +                # add nodes, while its is possible
  14.250 +                candidats = {}
  14.251 +                for node in self.nodes:
  14.252 +                    c = len([i for i in graph.nodes if self.bounded(node, i)])
  14.253 +                    if c == len(self.nodes):
  14.254 +                        graph1 = Graph(graph.nodes, graph.lines)
  14.255 +                        graph1.add_node(node, self)
  14.256 +                        candidats[node] = graph1.cost_one(node)
  14.257 +                if not candidats:
  14.258 +                    break
  14.259 +
  14.260 +                max_cost = max(candidats.values())
  14.261 +                node = [node for (node, cost) in candidats.items() if cost == max_cost][0]
  14.262 +                graph.add_node(node, self)
  14.263 +
  14.264 +            cliques.append(frozenset(graph.nodes))
  14.265 +
  14.266 +        return cliques
  14.267 +
  14.268 +
  14.269 +    def cliques(self, timeout=-1, minsize=1):
  14.270 +        """ returns length-sorted list of cliques
  14.271 +
  14.272 +        clique is frozenset
  14.273 +
  14.274 +        can change self!
  14.275 +
  14.276 +        try to execute bron_kerbosh
  14.277 +        if it raises TimeoutError, executes fast_cliques
  14.278 +        """
  14.279 +
  14.280 +        self.drop_if_count(minsize)
  14.281 +
  14.282 +        try:
  14.283 +            cliques = self.bron_kerbosh(timeout, minsize)
  14.284 +            cliques.sort(key=lambda clique: len(clique), reverse=True)
  14.285 +        except TimeoutError:
  14.286 +            cliques = self.fast_cliques(minsize)
  14.287 +        return cliques
  14.288 +
  14.289 +if __name__ == "__main__":
  14.290 +    import doctest
  14.291 +    doctest.testmod()
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/allpy/pdb.py	Tue Jan 25 16:03:00 2011 +0300
    15.3 @@ -0,0 +1,341 @@
    15.4 +""" Functions to get pdb information from fasta id
    15.5 +and to generate fasta id from pdb information
    15.6 +
    15.7 +pdb information: code, chain, model
    15.8 +
    15.9 +TODO: same for local pdb files
   15.10 +"""
   15.11 +
   15.12 +import re
   15.13 +import os
   15.14 +import os.path
   15.15 +from tempfile import NamedTemporaryFile
   15.16 +import urllib2
   15.17 +
   15.18 +from Bio.PDB import PDBParser
   15.19 +from Bio.PDB import Superimposer, CaPPBuilder, PDBIO
   15.20 +from Bio.PDB.DSSP import make_dssp_dict
   15.21 +
   15.22 +import base
   15.23 +from graph import Graph
   15.24 +
   15.25 +
   15.26 +# for pdb-codes
   15.27 +re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
   15.28 +
   15.29 +#~ # for files
   15.30 +#~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
   15.31 +
   15.32 +def std_id(pdb_id, pdb_chain, pdb_model=None):
   15.33 +    if pdb_model:
   15.34 +        return "%s_%s_%s" % \
   15.35 +        (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)
   15.36 +    else:
   15.37 +        return "%s_%s" % \
   15.38 +        (pdb_id.lower().strip(), pdb_chain.upper().strip())
   15.39 +
   15.40 +def pdb_id_parse(ID):
   15.41 +    match = re1.search(ID)
   15.42 +    if not match:
   15.43 +        return None
   15.44 +    d = match.groupdict()
   15.45 +    if 'chain' not in d or not d['chain']:
   15.46 +        d['chain'] = ' '
   15.47 +    if 'model' not in d or not d['model']:
   15.48 +        d['model'] = 0
   15.49 +    return d
   15.50 +
   15.51 +
   15.52 +def get_structure(file, name):
   15.53 +    return PDBParser().get_structure(name, file)
   15.54 +
   15.55 +#~ def std_id_parse(ID):
   15.56 +    #~ """
   15.57 +    #~ Parse standart ID to pdb_code, chain and model
   15.58 +    #~ """
   15.59 +    #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():
   15.60 +        #~ # it is file
   15.61 +        #~ parseO = self.re2.search(ID) # files
   15.62 +    #~ else:
   15.63 +        #~ parseO = self.re1.search(ID.lower()) # pdb codes
   15.64 +    #~ if not parseO:
   15.65 +        #~ return None
   15.66 +    #~ parse = parseO.groups()
   15.67 +    #~ if len(parse) < 2:
   15.68 +        #~ return None
   15.69 +    #~ code = parse[1]
   15.70 +    #~ chain = ''
   15.71 +    #~ model = None
   15.72 +    #~ if len(parse) >= 4:
   15.73 +        #~ chain = parse[3]
   15.74 +        #~ if chain:
   15.75 +            #~ chain = chain.upper()
   15.76 +        #~ if len(parse) >= 6:
   15.77 +            #~ if parse[5]:
   15.78 +                #~ model = parse[5]
   15.79 +    #~ return code, chain, model
   15.80 +
   15.81 +class SequenceMixin(base.Sequence):
   15.82 +    """Mixin for adding PDB data to a Sequence.
   15.83 +
   15.84 +    Please note: since this is a mixin, no objects of this class should be
   15.85 +    created. This class is intended to be subclassed together with one of
   15.86 +    Sequence classes.
   15.87 +
   15.88 +    Attributes:
   15.89 +
   15.90 +    *   pdb_chain -- Bio.PDB.Chain
   15.91 +    *   pdb_file -- file object
   15.92 +
   15.93 +    *   pdb_residues -- {Monomer: Bio.PDB.Residue}
   15.94 +    *   pdb_secstr -- {Monomer: 'Secondary structure'}
   15.95 +            Code   Secondary structure
   15.96 +            H      alpha-helix
   15.97 +            B      Isolated beta-bridge residue
   15.98 +            E      Strand
   15.99 +            G      3-10 helix
  15.100 +            I      pi-helix
  15.101 +            T      Turn
  15.102 +            S      Bend
  15.103 +            -      Other
  15.104 +
  15.105 +
  15.106 +    ?TODO: global pdb_structures
  15.107 +    """
  15.108 +
  15.109 +    def __init__(self, *args, **kw):
  15.110 +        self.pdb_chains = []
  15.111 +        self.pdb_files = {}
  15.112 +        self.pdb_residues = {}
  15.113 +        self.pdb_secstr = {}
  15.114 +
  15.115 +    def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
  15.116 +        """ Reads Pdb chain from file
  15.117 +
  15.118 +        and align each Monomer with PDB.Residue (TODO)
  15.119 +        """
  15.120 +        name = std_id(pdb_id, pdb_chain, pdb_model)
  15.121 +        structure = get_structure(pdb_file, name)
  15.122 +        chain = structure[pdb_model][pdb_chain]
  15.123 +        self.pdb_chains.append(chain)
  15.124 +        self.pdb_residues[chain] = {}
  15.125 +        self.pdb_secstr[chain] = {}
  15.126 +        pdb_sequence = Sequence.from_pdb_chain(chain)
  15.127 +        a = alignment.Alignment.from_sequences(self, pdb_sequence)
  15.128 +        a.muscle_align()
  15.129 +        for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self):
  15.130 +            if pdb_sequence.pdb_has(chain, pdb_monomer):
  15.131 +                residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
  15.132 +                self.pdb_residues[chain][monomer] = residue
  15.133 +        self.pdb_files[chain] = pdb_file
  15.134 +
  15.135 +    def pdb_unload(self):
  15.136 +        """ Delete all pdb-connected links """
  15.137 +        #~ gc.get_referrers(self.pdb_chains[0])
  15.138 +        self.pdb_chains = []
  15.139 +        self.pdb_residues = {}
  15.140 +        self.pdb_secstr = {} # FIXME
  15.141 +        self.pdb_files = {} # FIXME
  15.142 +
  15.143 +    @staticmethod
  15.144 +    def from_pdb_chain(chain):
  15.145 +        """ Returns Sequence with Monomers with link to Bio.PDB.Residue
  15.146 +
  15.147 +        chain is Bio.PDB.Chain
  15.148 +        """
  15.149 +        cappbuilder = CaPPBuilder()
  15.150 +        peptides = cappbuilder.build_peptides(chain)
  15.151 +        sequence = Sequence()
  15.152 +        sequence.pdb_chains = [chain]
  15.153 +        sequence.pdb_residues[chain] = {}
  15.154 +        sequence.pdb_secstr[chain] = {}
  15.155 +        for peptide in peptides:
  15.156 +            for ca_atom in peptide.get_ca_list():
  15.157 +                residue = ca_atom.get_parent()
  15.158 +                monomer = AminoAcidType.from_pdb_residue(residue).instance()
  15.159 +                sequence.pdb_residues[chain][monomer] = residue
  15.160 +                sequence.monomers.append(monomer)
  15.161 +        return sequence
  15.162 +
  15.163 +    def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
  15.164 +        """ Adds pdb information to each monomer
  15.165 +
  15.166 +        Returns if information has been successfully added
  15.167 +        TODO: conformity_file
  15.168 +
  15.169 +        id-format lava flow
  15.170 +        """
  15.171 +        if not conformity_info:
  15.172 +            path = os.path.join(pdb_directory, self.name)
  15.173 +            if os.path.exists(path) and os.path.getsize(path):
  15.174 +                match = pdb_id_parse(self.name)
  15.175 +                self.pdb_chain_add(open(path), match['code'],
  15.176 +                match['chain'], match['model'])
  15.177 +            else:
  15.178 +                match = pdb_id_parse(self.name)
  15.179 +                if match:
  15.180 +                    code = match['code']
  15.181 +                    pdb_filename = config.pdb_dir % code
  15.182 +                    if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
  15.183 +                        url = config.pdb_url % code
  15.184 +                        print "Download %s" % url
  15.185 +                        pdb_file = open(pdb_filename, 'w')
  15.186 +                        data = urllib2.urlopen(url).read()
  15.187 +                        pdb_file.write(data)
  15.188 +                        pdb_file.close()
  15.189 +                        print "Save %s" % pdb_filename
  15.190 +                    pdb_file = open(pdb_filename)
  15.191 +                    self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
  15.192 +
  15.193 +    def pdb_save(self, out_filename, pdb_chain):
  15.194 +        """ Saves pdb_chain to out_file """
  15.195 +        class GlySelect(Select):
  15.196 +            def accept_chain(self, chain):
  15.197 +                if chain == pdb_chain:
  15.198 +                    return 1
  15.199 +                else:
  15.200 +                    return 0
  15.201 +        io = PDBIO()
  15.202 +        structure = chain.get_parent()
  15.203 +        io.set_structure(structure)
  15.204 +        io.save(out_filename, GlySelect())
  15.205 +
  15.206 +
  15.207 +    def pdb_add_sec_str(self, pdb_chain):
  15.208 +        """ Add secondary structure data """
  15.209 +        tmp_file = NamedTemporaryFile(delete=False)
  15.210 +        tmp_file.close()
  15.211 +        pdb_file = self.pdb_files[pdb_chain].name
  15.212 +        os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
  15.213 +        dssp, keys = make_dssp_dict(tmp_file.name)
  15.214 +        for monomer in self.monomers:
  15.215 +            if self.pdb_has(pdb_chain, monomer):
  15.216 +                residue = self.pdb_residues[pdb_chain][monomer]
  15.217 +                try:
  15.218 +                    d = dssp[(pdb_chain.get_id(), residue.get_id())]
  15.219 +                    self.pdb_secstr[pdb_chain][monomer] = d[1]
  15.220 +                except:
  15.221 +                    print "No dssp information about %s at %s" % (monomer, pdb_chain)
  15.222 +        os.unlink(tmp_file.name)
  15.223 +
  15.224 +    def pdb_has(self, chain, monomer):
  15.225 +        return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
  15.226 +
  15.227 +    def secstr_has(self, chain, monomer):
  15.228 +        return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
  15.229 +
  15.230 +
  15.231 +class AlignmentMixin(base.Alignment):
  15.232 +    """Mixin to add 3D properties to alignments.
  15.233 +
  15.234 +    Please note: since this is a mixin, no objects of this class should be
  15.235 +    created. This class is intended to be subclassed together with one of
  15.236 +    Alignment classes.
  15.237 +    """
  15.238 +
  15.239 +    def secstr(self, sequence, pdb_chain, gap='-'):
  15.240 +        """ Returns string representing secondary structure """
  15.241 +        return ''.join([
  15.242 +        (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap)
  15.243 +        for m in self.body[sequence]])
  15.244 +
  15.245 +class BlockMixin(base.Block):
  15.246 +    """Mixin to add 3D properties to blocks.
  15.247 +
  15.248 +    Please note: since this is a mixin, no objects of this class should be
  15.249 +    created. This class is intended to be subclassed together with one of
  15.250 +    Block classes.
  15.251 +    """
  15.252 +
  15.253 +    def geometrical_cores(self, max_delta=config.delta,
  15.254 +    timeout=config.timeout, minsize=config.minsize,
  15.255 +    ac_new_atoms=config.ac_new_atoms,
  15.256 +    ac_count=config.ac_count):
  15.257 +        """ Returns length-sorted list of blocks, representing GCs
  15.258 +
  15.259 +        * max_delta -- threshold of distance spreading
  15.260 +        * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
  15.261 +        * minsize -- min size of each core
  15.262 +        * ac_new_atoms -- min part or new atoms in new alternative core
  15.263 +            current GC is compared with each of already selected GCs if
  15.264 +            difference is less then ac_new_atoms, current GC is skipped
  15.265 +            difference = part of new atoms in current core
  15.266 +        * ac_count -- max number of cores (including main core)
  15.267 +            -1 means infinity
  15.268 +
  15.269 +        If more than one pdb chain for some sequence provided, consider all of them
  15.270 +        cost is calculated as 1 / (delta + 1)
  15.271 +
  15.272 +            delta in [0, +inf) => cost in (0, 1]
  15.273 +        """
  15.274 +        nodes = self.positions
  15.275 +        lines = {}
  15.276 +        for i in self.positions:
  15.277 +            for j in self.positions:
  15.278 +                if i < j:
  15.279 +                    distances = []
  15.280 +                    for sequence in self.sequences:
  15.281 +                        for chain in sequence.pdb_chains:
  15.282 +                            m1 = self.alignment.body[sequence][i]
  15.283 +                            m2 = self.alignment.body[sequence][j]
  15.284 +                            if m1 and m2:
  15.285 +                                r1 = sequence.pdb_residues[chain][m1]
  15.286 +                                r2 = sequence.pdb_residues[chain][m2]
  15.287 +                                ca1 = r1['CA']
  15.288 +                                ca2 = r2['CA']
  15.289 +                                d = ca1 - ca2 # Bio.PDB feature
  15.290 +                                distances.append(d)
  15.291 +                    if len(distances) >= 2:
  15.292 +                        delta = max(distances) - min(distances)
  15.293 +                        if delta <= max_delta:
  15.294 +                            lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
  15.295 +        graph = Graph(nodes, lines)
  15.296 +        cliques = graph.cliques(timeout=timeout, minsize=minsize)
  15.297 +        GCs = []
  15.298 +        for clique in cliques:
  15.299 +            for GC in GCs:
  15.300 +                if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
  15.301 +                    break
  15.302 +
  15.303 +    def ca_atoms(self, sequence, pdb_chain):
  15.304 +        """ Iterates Ca-atom of monomers of this sequence from this block  """
  15.305 +        return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
  15.306 +
  15.307 +    def sequences_chains(self):
  15.308 +        """ Iterates pairs (sequence, chain) """
  15.309 +        for sequence in self.alignment.sequences:
  15.310 +            if sequence in self.sequences:
  15.311 +                for chain in sequence.pdb_chains:
  15.312 +                    yield (sequence, chain)
  15.313 +
  15.314 +    def superimpose(self):
  15.315 +        """ Superimpose all pdb_chains in this block """
  15.316 +        sequences_chains = list(self.sequences_chains())
  15.317 +        if len(sequences_chains) >= 1:
  15.318 +            sup = Superimposer()
  15.319 +            fixed_sequence, fixed_chain = sequences_chains.pop()
  15.320 +            fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
  15.321 +            for sequence, chain in sequences_chains:
  15.322 +                moving_atoms =  self.ca_atoms(sequence, chain)
  15.323 +                sup.set_atoms(fixed_atoms, moving_atoms)
  15.324 +                # Apply rotation/translation to the moving atoms
  15.325 +                sup.apply(moving_atoms)
  15.326 +
  15.327 +    def pdb_save(self, out_file):
  15.328 +        """ Save all sequences
  15.329 +
  15.330 +        Returns {(sequence, chain): CHAIN}
  15.331 +        CHAIN is chain letter in new file
  15.332 +        """
  15.333 +        tmp_file = NamedTemporaryFile(delete=False)
  15.334 +        tmp_file.close()
  15.335 +
  15.336 +        for sequence, chain in self.sequences_chains():
  15.337 +            sequence.pdb_save(tmp_file.name, chain)
  15.338 +            # TODO: read from tmp_file.name
  15.339 +            # change CHAIN
  15.340 +            # add to out_file
  15.341 +
  15.342 +        os.unlink(NamedTemporaryFile)
  15.343 +
  15.344 +# vim: set ts=4 sts=4 sw=4 et:
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/allpy/protein.py	Tue Jan 25 16:03:00 2011 +0300
    16.3 @@ -0,0 +1,53 @@
    16.4 +import base
    16.5 +import data.codes
    16.6 +
    16.7 +class Monomer(base.Monomer):
    16.8 +    """Protein monomers: aminoacids."""
    16.9 +    type = 'protein'
   16.10 +    by_code1 = {}
   16.11 +    by_code3 = {}
   16.12 +    by_name = {}
   16.13 +Monomer._initialize(data.codes.protein)
   16.14 +
   16.15 +class Protein(list):
   16.16 +    """User defined protein; list of protein_sequences."""
   16.17 +    pass
   16.18 +
   16.19 +class Sequence(base.Sequence):
   16.20 +    monomer_type = Monomer
   16.21 +
   16.22 +class Alignment(base.Alignment):
   16.23 +    sequence_type = Sequence
   16.24 +
   16.25 +    def muscle_align(self):
   16.26 +        """ Simple align ths alignment using sequences (muscle)
   16.27 +
   16.28 +        uses old Monomers and Sequences objects
   16.29 +        """
   16.30 +        tmp_file = NamedTemporaryFile(delete=False)
   16.31 +        self.save_fasta(tmp_file)
   16.32 +        tmp_file.close()
   16.33 +        os.system("muscle -in %(tmp)s -out %(tmp)s" % {'tmp': tmp_file.name})
   16.34 +        sequences, body = Alignment.from_file(open(tmp_file.name))
   16.35 +        for sequence in self.sequences:
   16.36 +            try:
   16.37 +                new_sequence = [i for i in sequences if sequence==i][0]
   16.38 +            except:
   16.39 +                raise Exception("Align: Cann't find sequence %s in muscle output" % \
   16.40 +                sequence.name)
   16.41 +            old_monomers = iter(sequence.monomers)
   16.42 +            self.body[sequence] = []
   16.43 +            for monomer in body[new_sequence]:
   16.44 +                if not monomer:
   16.45 +                    self.body[sequence].append(monomer)
   16.46 +                else:
   16.47 +                    old_monomer = old_monomers.next()
   16.48 +                    if monomer != old_monomer:
   16.49 +                        raise Exception("Align: alignment errors")
   16.50 +                    self.body[sequence].append(old_monomer)
   16.51 +        os.unlink(tmp_file.name)
   16.52 +
   16.53 +class Block(Alignment, base.Block):
   16.54 +    pass
   16.55 +
   16.56 +# vim: set ts=4 sts=4 sw=4 et:
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/allpy/rna.py	Tue Jan 25 16:03:00 2011 +0300
    17.3 @@ -0,0 +1,21 @@
    17.4 +import base
    17.5 +import data.codes
    17.6 +
    17.7 +class Monomer(base.Monomer):
    17.8 +    """RNA monomers: nucleotides."""
    17.9 +    type = 'rna'
   17.10 +    by_code1 = {}
   17.11 +    by_code3 = {}
   17.12 +    by_name = {}
   17.13 +Monomer._initialize(data.codes.rna)
   17.14 +
   17.15 +class Sequence(base.Sequence):
   17.16 +    monomer_type = Monomer
   17.17 +
   17.18 +class Alignment(base.Alignment):
   17.19 +    sequence_type = Sequence
   17.20 +
   17.21 +class Block(Alignment, base.Block):
   17.22 +    pass
   17.23 +
   17.24 +# vim: set ts=4 sts=4 sw=4 et:
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/allpy/util.py	Tue Jan 25 16:03:00 2011 +0300
    18.3 @@ -0,0 +1,26 @@
    18.4 +"""Miscellanous utilities.
    18.5 +"""
    18.6 +
    18.7 +def unzip(seq):
    18.8 +    """The oppozite of zip() builtin."""
    18.9 +    a, b = [], []
   18.10 +    for x, y in seq:
   18.11 +        a.append(x)
   18.12 +        b.append(y)
   18.13 +    return a, b
   18.14 +
   18.15 +def remove_each(string, substrings):
   18.16 +    """Remove each of substrings from string."""
   18.17 +    for sub in substrings:
   18.18 +        string = string.replace(sub, "")
   18.19 +    return string
   18.20 +
   18.21 +class UserDict(dict):
   18.22 +    """Clone of dict that user may add attributes to."""
   18.23 +    pass
   18.24 +
   18.25 +class UserList(list):
   18.26 +    """Clone of list that user may add attributes to."""
   18.27 +    pass
   18.28 +
   18.29 +# vim: set et ts=4 sts=4 sw=4:
    19.1 --- a/blocks3d/AlignmentSeq.py	Mon Jan 24 21:40:10 2011 +0300
    19.2 +++ b/blocks3d/AlignmentSeq.py	Tue Jan 25 16:03:00 2011 +0300
    19.3 @@ -16,7 +16,7 @@
    19.4  l=AlignmentSeq(seq_in)
    19.5  for t in l.new_sequences:
    19.6      print t
    19.7 -    
    19.8 +
    19.9  """
   19.10  
   19.11  
   19.12 @@ -44,7 +44,7 @@
   19.13  
   19.14          self.common - ?????? [???????] => [?????? ????, ?????? ??????? ?????]
   19.15          """
   19.16 -        
   19.17 +
   19.18  
   19.19          self.old_sequences = []
   19.20          self.new_sequences = []
   19.21 @@ -56,20 +56,20 @@
   19.22  
   19.23          self.common=[]
   19.24  
   19.25 -        
   19.26 +
   19.27          for i in xrange(0,len(self.old_sequences)):
   19.28               self.unite(i)
   19.29  
   19.30  
   19.31          for i in xrange(0,len(self.old_sequences)):
   19.32 -             self.lining(i)          
   19.33 -##        
   19.34 +             self.lining(i)
   19.35 +##
   19.36  ##        return
   19.37  ##
   19.38  ##        self.correct()
   19.39 -##        
   19.40  ##
   19.41 -##        
   19.42 +##
   19.43 +##
   19.44  ##
   19.45  ##        for str1 in self.new_sequences:
   19.46  ##            self.connections.append(self.recount(str1))
   19.47 @@ -79,7 +79,7 @@
   19.48  
   19.49  
   19.50  
   19.51 -   
   19.52 +
   19.53      def cost(self,a1,a2):
   19.54  
   19.55          """
   19.56 @@ -90,7 +90,7 @@
   19.57  
   19.58          global matrix
   19.59          global gaps
   19.60 -        
   19.61 +
   19.62          a1=a1.upper()
   19.63          a2=a2.upper()
   19.64  
   19.65 @@ -99,8 +99,6 @@
   19.66                  return matrix[a1][a2]
   19.67  
   19.68          return gaps[0]
   19.69 -            
   19.70 -        
   19.71  
   19.72  
   19.73  
   19.74 @@ -108,7 +106,9 @@
   19.75  
   19.76  
   19.77  
   19.78 -   
   19.79 +
   19.80 +
   19.81 +
   19.82      def gap_cost(self,int1):
   19.83          """
   19.84          ??????? ????????? ????? ???????????? ????? ?
   19.85 @@ -118,13 +118,13 @@
   19.86  
   19.87          global gaps
   19.88  
   19.89 -        
   19.90 +
   19.91          if (int1 >= len(gaps)):
   19.92              return gaps[(len(gaps)-1)]
   19.93          else:
   19.94              return gaps[int1]
   19.95  
   19.96 -        
   19.97 +
   19.98  
   19.99  
  19.100  
  19.101 @@ -151,7 +151,7 @@
  19.102          str1=self.old_sequences[chainN]
  19.103          len1=len(str1)
  19.104  
  19.105 -        
  19.106 +
  19.107  
  19.108  
  19.109          if (not self.common):
  19.110 @@ -159,10 +159,10 @@
  19.111              while (i<len1):
  19.112                  aminoacids=[str1[i]]
  19.113                  chains=[chainN]
  19.114 -                
  19.115 +
  19.116                  self.common.append([aminoacids,chains])
  19.117                  i+=1
  19.118 -                
  19.119 +
  19.120              return
  19.121  
  19.122  
  19.123 @@ -188,7 +188,7 @@
  19.124                  tip_from[i].append(0)
  19.125  
  19.126  
  19.127 -##        
  19.128 +##
  19.129  ##        for i in xrange(1,len1+1):
  19.130  ##            d[i][0] =  0
  19.131  ##            already_gaps[i][0] = [0,1+i]
  19.132 @@ -197,7 +197,7 @@
  19.133  ##        for j in xrange(1,len2+1):
  19.134  ##            d[0][j] =  0
  19.135  ##            already_gaps[0][j] = [1+j,0]
  19.136 -##        
  19.137 +##
  19.138  
  19.139  
  19.140  
  19.141 @@ -212,12 +212,12 @@
  19.142                      costs.append(self.cost(str1[i-1],A))
  19.143                  cost = max(costs)
  19.144  
  19.145 -                
  19.146 +
  19.147                  insertion = d[i-1][j]
  19.148                  if (j != len2): # ?????????? ???
  19.149                      insertion += self.gap_cost(already_gaps[i-1][j][1])
  19.150  
  19.151 -                    
  19.152 +
  19.153                  deletion  = d[i][j-1]
  19.154                  if (i != len1): # ?????????? ???
  19.155                      deletion += self.gap_cost(already_gaps[i][j-1][0])
  19.156 @@ -229,32 +229,32 @@
  19.157                  d[i][j] = max_way
  19.158  
  19.159  
  19.160 -                                 
  19.161 -            
  19.162 -                
  19.163 +
  19.164 +
  19.165 +
  19.166                  if (max_way==substitution):
  19.167 -                    tip=3   
  19.168 +                    tip=3
  19.169                  if (max_way==deletion):
  19.170 -                    tip=2                
  19.171 +                    tip=2
  19.172                  if (max_way==insertion):
  19.173                      tip=1
  19.174 - 
  19.175 +
  19.176  
  19.177  
  19.178                  if (tip==1):                             # insertion
  19.179                      already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ]
  19.180  
  19.181 -                    
  19.182 +
  19.183                  if (tip==2):                             # deletion
  19.184                      already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ]
  19.185  
  19.186                  if (tip==3):                             # substitution
  19.187                      already_gaps[i][j]=[ 0, 0 ]
  19.188 -                
  19.189 +
  19.190                  tip_from[i][j]=tip
  19.191  
  19.192  
  19.193 -##        
  19.194 +##
  19.195  ##        for d1 in d:
  19.196  ##            for d11 in d1:
  19.197  ##                print d11,
  19.198 @@ -262,24 +262,24 @@
  19.199  ##
  19.200  ##
  19.201  ##
  19.202 -##                
  19.203 +##
  19.204  ##        for d1 in tip_from:
  19.205  ##            for d11 in d1:
  19.206  ##                print d11,
  19.207  ##            print
  19.208  ##
  19.209 -##        
  19.210 +##
  19.211  ##
  19.212  ##        print insertion
  19.213  ##        print deletion
  19.214  ##        print substitution
  19.215  ##
  19.216  ##
  19.217 -##        
  19.218 +##
  19.219  ##        print already_gaps
  19.220 -##        
  19.221 +##
  19.222  
  19.223 -        
  19.224 +
  19.225  
  19.226          i=len1
  19.227          j=len2
  19.228 @@ -288,43 +288,43 @@
  19.229  
  19.230          while (i>0 or j>0):
  19.231              tip=tip_from[i][j]
  19.232 -            
  19.233 +
  19.234              if tip==1 or (j==0 and i>0):
  19.235  
  19.236                  aminoacids=[(str1[i-1])]
  19.237                  chains=[chainN]
  19.238 -                
  19.239 +
  19.240                  common.append([aminoacids,chains])
  19.241 -                
  19.242 +
  19.243                  i-=1
  19.244  
  19.245  
  19.246 -                
  19.247 +
  19.248              if tip==2 or (i==0 and j>0):
  19.249 -                
  19.250 +
  19.251                  common.append(self.common[j-1])
  19.252                  j-=1
  19.253  
  19.254 -                
  19.255 +
  19.256              if (tip==3):
  19.257 -                                
  19.258 +
  19.259                  chains=self.common[j-1][1]
  19.260                  chains.append(chainN)
  19.261 -                
  19.262 +
  19.263                  aminoacids=self.common[j-1][0]
  19.264 -                
  19.265 +
  19.266                  if (not aminoacids.count(str1[i-1])):
  19.267                      aminoacids.append(str1[i-1])
  19.268  
  19.269                  common.append([aminoacids,chains])
  19.270 -                    
  19.271 +
  19.272                  i-=1
  19.273                  j-=1
  19.274  
  19.275 -          
  19.276 -            
  19.277 +
  19.278 +
  19.279          common.reverse()
  19.280 -        
  19.281 +
  19.282          self.common=common
  19.283  
  19.284  
  19.285 @@ -356,12 +356,12 @@
  19.286          ????? ??????? ????? ??????????? ??????????????????
  19.287          ? self.new_sequences
  19.288  
  19.289 -        chainN - ????? ????    
  19.290 +        chainN - ????? ????
  19.291          """
  19.292  
  19.293          str1=self.old_sequences[chainN]
  19.294          len1=len(str1)
  19.295 -        
  19.296 +
  19.297          len2=len(self.common)
  19.298  
  19.299  
  19.300 @@ -390,28 +390,28 @@
  19.301  ##    def correct(self):
  19.302  ##
  19.303  ##        new_sequences=[]
  19.304 -##        
  19.305 +##
  19.306  ##        all_l = len(self.new_sequences[0]) # ?????????? ?? ???? ???????????????????
  19.307  ##        for str1 in self.new_sequences:
  19.308  ##            all_l = min (all_l,len(str1))
  19.309  ##
  19.310  ##
  19.311 -##        
  19.312 +##
  19.313  ##        i=0
  19.314  ##        while (i < all_l):
  19.315 -##            
  19.316 +##
  19.317  ##            if (i==0):
  19.318  ##                for str1 in self.new_sequences:
  19.319  ##                    new_sequences.append(str1[0])
  19.320  ##                i+=1
  19.321  ##                continue
  19.322  ##
  19.323 -##            
  19.324 +##
  19.325  ##            all_gaps=1
  19.326  ##            for str1 in self.new_sequences:
  19.327  ##                if (str1[i]!='-'):
  19.328  ##                    all_gaps=0
  19.329 -##                    
  19.330 +##
  19.331  ##            if (all_gaps==1):
  19.332  ##                i+=1
  19.333  ##                continue
  19.334 @@ -424,10 +424,10 @@
  19.335  ##                if (str1[i]!='-' and new_sequences[j][-1]!='-'):
  19.336  ##                    sovmest=0
  19.337  ##                    break
  19.338 -##                    
  19.339 -##                   
  19.340  ##
  19.341 -##                           
  19.342 +##
  19.343 +##
  19.344 +##
  19.345  ##            j=-1
  19.346  ##            for str1 in self.new_sequences:
  19.347  ##                j+=1
  19.348 @@ -437,7 +437,7 @@
  19.349  ##                        new_sequences[j] = new_sequences[j][:-1] + str1[i]
  19.350  ##                else:
  19.351  ##                    new_sequences[j] = new_sequences[j] + str1[i]
  19.352 -##            
  19.353 +##
  19.354  ##
  19.355  ##
  19.356  ##            i+=1
  19.357 @@ -446,7 +446,7 @@
  19.358  ##        self.new_sequences = new_sequences
  19.359  
  19.360  
  19.361 -        
  19.362 +
  19.363  
  19.364  
  19.365  
  19.366 @@ -467,7 +467,7 @@
  19.367  ##    """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
  19.368  ##    def recount(self,str1):
  19.369  ##        spisok={}
  19.370 -##                        
  19.371 +##
  19.372  ##        len1=len(str1)
  19.373  ##        old_nomer = 0
  19.374  ##
  19.375 @@ -487,4 +487,4 @@
  19.376  
  19.377  
  19.378  
  19.379 -            
  19.380 +
    20.1 --- a/blocks3d/Blocks3D.py	Mon Jan 24 21:40:10 2011 +0300
    20.2 +++ b/blocks3d/Blocks3D.py	Tue Jan 25 16:03:00 2011 +0300
    20.3 @@ -23,7 +23,7 @@
    20.4  # determine html.htm path
    20.5  if hasattr(Blocks3D_config, 'html_file'):
    20.6      html_file = Blocks3D_config.html_file
    20.7 -    
    20.8 +
    20.9  else:
   20.10  
   20.11      if sys.argv[0].replace('/', '') != sys.argv[0]:
   20.12 @@ -129,8 +129,8 @@
   20.13  except:
   20.14      all_right = 0
   20.15      print "Error: bad option"
   20.16 -    
   20.17 -    
   20.18 +
   20.19 +
   20.20  
   20.21  if s:
   20.22      save = 0
   20.23 @@ -159,7 +159,7 @@
   20.24  
   20.25  #if not Super_core:
   20.26  #    Super_core = 0
   20.27 -    
   20.28 +
   20.29  #Super_core = int(Super_core)
   20.30  
   20.31  
   20.32 @@ -175,7 +175,7 @@
   20.33          print "Error: can not find input file"
   20.34  except:
   20.35      all_right = 0
   20.36 -    print "Error: can not find input file"    
   20.37 +    print "Error: can not find input file"
   20.38  
   20.39  
   20.40  if not (output_html_file):
   20.41 @@ -209,7 +209,7 @@
   20.42  if not all_right:
   20.43  
   20.44      print ' '
   20.45 -    
   20.46 +
   20.47      print 'Programm for find blocks of true alignment'
   20.48  
   20.49      print ' '
   20.50 @@ -276,14 +276,14 @@
   20.51  #if text.count("\n>") >= 2:
   20.52  if text.count("\n>") >= 1:
   20.53      # fasta
   20.54 -    
   20.55 +
   20.56      # write to temp file input fasta alignment
   20.57      open(GC_temp, 'w').write(text)
   20.58 -    
   20.59 +
   20.60  else:
   20.61  
   20.62      # not fasta
   20.63 -    
   20.64 +
   20.65      # convert input file to fasta
   20.66  
   20.67      try:
   20.68 @@ -303,7 +303,7 @@
   20.69  
   20.70  
   20.71  
   20.72 -    
   20.73 +
   20.74  
   20.75  
   20.76  
   20.77 @@ -322,9 +322,9 @@
   20.78  current_id = '' # current fasta identifier
   20.79  
   20.80  for fasta_string in fasta:
   20.81 -    
   20.82 +
   20.83      fasta_string = fasta_string.strip()
   20.84 -    
   20.85 +
   20.86      if not len(fasta_string):
   20.87          continue
   20.88  
   20.89 @@ -345,14 +345,14 @@
   20.90  # read conformity file
   20.91  
   20.92  
   20.93 -conformity_dict = {} # Key - fasta_id. Value - list [pdb_code, pdb_chain] 
   20.94 +conformity_dict = {} # Key - fasta_id. Value - list [pdb_code, pdb_chain]
   20.95  
   20.96  
   20.97  if conformity_file:
   20.98      conformity = open(conformity_file).readlines()
   20.99  
  20.100      for conformity_string in conformity:
  20.101 -        
  20.102 +
  20.103          if len(conformity_string) < 8:
  20.104              continue
  20.105  
  20.106 @@ -363,12 +363,12 @@
  20.107              exit()
  20.108  
  20.109          fasta_id = conformity_list[0]
  20.110 -        
  20.111 +
  20.112  ##        if not fasta_dict.has_key(fasta_id):
  20.113  ##            print 'Error: unknown fasta identifier "' + fasta_id + '" in conformity file'
  20.114 -##            exit()            
  20.115 +##            exit()
  20.116  
  20.117 -                                  
  20.118 +
  20.119          conformity_pdb = conformity_list[1]
  20.120  
  20.121  #        if len(conformity_pdb) != 2:
  20.122 @@ -379,7 +379,7 @@
  20.123  #            print 'Error: can not read PDB code and chain "' + conformity_list[1] + '" in conformity file'
  20.124  #            exit()
  20.125  
  20.126 -       
  20.127 +
  20.128          conformity_dict[fasta_id] = conformity_pdb
  20.129  
  20.130  
  20.131 @@ -392,16 +392,16 @@
  20.132  # ID_user[ID] = ID, entered by user
  20.133  ID_user = {}
  20.134  
  20.135 -    
  20.136 +
  20.137  for fasta_id, sequence in fasta_dict.items():
  20.138 -    
  20.139 +
  20.140      if conformity_dict.has_key(fasta_id):
  20.141          fasta_id = conformity_dict[fasta_id]
  20.142 -        
  20.143 -    
  20.144 +
  20.145 +
  20.146      pdb = B3D.from_ID(fasta_id)
  20.147 -    
  20.148 -    
  20.149 +
  20.150 +
  20.151  
  20.152      if not pdb:
  20.153          print 'Error: sequence identifier "' + fasta_id + '" in not valid'
  20.154 @@ -411,7 +411,7 @@
  20.155      pdb_code, pdb_chain, model = pdb
  20.156  
  20.157      block.append([pdb_code, pdb_chain, sequence, model])
  20.158 -    
  20.159 +
  20.160      # save ID, entered by user
  20.161      ID = B3D.to_ID(pdb_code, pdb_chain, model)
  20.162      ID_user[ID] = fasta_id
  20.163 @@ -431,8 +431,8 @@
  20.164  # run GeometricalCore_class to make core
  20.165  ##############################################
  20.166  
  20.167 -  
  20.168 -    
  20.169 +
  20.170 +
  20.171  B3D.min_width = min_width
  20.172  B3D.timeout_2 = timeout_2
  20.173  
  20.174 @@ -444,7 +444,7 @@
  20.175  B3D.timeout = timeout
  20.176  B3D.pdb_url = pdb_url
  20.177  
  20.178 -    
  20.179 +
  20.180  # blocks is object with result
  20.181  blocks = B3D.find_blocks(block)
  20.182  
  20.183 @@ -455,13 +455,13 @@
  20.184  for block in blocks:
  20.185  
  20.186      IDs_1 = []
  20.187 -    
  20.188 +
  20.189      for ID in block['IDs']:
  20.190 -    
  20.191 +
  20.192          IDs_1.append(ID_user[ID])
  20.193 -    
  20.194 +
  20.195      block['IDs'] = IDs_1
  20.196 -    
  20.197 +
  20.198      blocks_1.append(block)
  20.199  
  20.200  
  20.201 @@ -470,25 +470,25 @@
  20.202  
  20.203  if output_html_file:
  20.204      # produce HTML
  20.205 -    
  20.206 +
  20.207      t = ''
  20.208 -    
  20.209 -    
  20.210 +
  20.211 +
  20.212      t += "blocks = json('" + json.dumps(blocks_1) + "');"
  20.213      t += "fasta_dict = json('" + json.dumps(fasta_dict) + "');"
  20.214 -    
  20.215 +
  20.216  #    t += "try{blocks_init();}catch(e){}"
  20.217 -    
  20.218 -    
  20.219 -    
  20.220 +
  20.221 +
  20.222 +
  20.223      html = ''.join(open(html_file).readlines())
  20.224 -    
  20.225 +
  20.226      # insert out code instead of "self_js_text"
  20.227      html = html.replace('self_js_text', t)
  20.228 -    
  20.229 -    
  20.230 +
  20.231 +
  20.232      open(output_html_file, 'w').write(html)
  20.233 -    
  20.234 +
  20.235  
  20.236  
  20.237  
    21.1 --- a/blocks3d/Blocks3D_class.py	Mon Jan 24 21:40:10 2011 +0300
    21.2 +++ b/blocks3d/Blocks3D_class.py	Tue Jan 25 16:03:00 2011 +0300
    21.3 @@ -36,25 +36,25 @@
    21.4  
    21.5      User parameters:
    21.6  
    21.7 -    
    21.8 +
    21.9      """
   21.10  
   21.11  
   21.12  
   21.13  
   21.14 -    
   21.15 +
   21.16      def __init__(self):
   21.17  
   21.18          """
   21.19          Create new copy of class
   21.20          """
   21.21 -        
   21.22 +
   21.23          GC.__init__(self)
   21.24 -        
   21.25 +
   21.26          vars(self).update(vars(Blocks3D_config))
   21.27 -        
   21.28 -        
   21.29 -        
   21.30 +
   21.31 +
   21.32 +
   21.33  
   21.34  
   21.35  
   21.36 @@ -67,7 +67,7 @@
   21.37                  It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None]
   21.38                      SSNAKIDQLSSDAQ - part of alignment block sequence
   21.39                      Last parameter - model or None
   21.40 -                    
   21.41 +
   21.42  
   21.43          Result:
   21.44              returns list of blocks
   21.45 @@ -77,7 +77,7 @@
   21.46                  end: integer
   21.47                  IDs: list of IDs
   21.48          """
   21.49 -        
   21.50 +
   21.51  #        # list of IDs
   21.52          IDs = []
   21.53  
   21.54 @@ -85,109 +85,109 @@
   21.55  
   21.56              ID = self.to_ID(pdb_code, pdb_chain, model)
   21.57              IDs.append(ID)
   21.58 -        
   21.59 +
   21.60          self.IDs = IDs
   21.61 -        
   21.62 +
   21.63  
   21.64          # build couple cores:
   21.65 -        
   21.66 +
   21.67          # CORES = <list of cores>
   21.68          # core is list of positions
   21.69  #        CORES = []
   21.70 -        
   21.71 -        
   21.72 -        
   21.73 +
   21.74 +
   21.75 +
   21.76          # couple_core_parts[ID_1][ID_2] = <list of parts>
   21.77          # part = {'core': core_number, 'start': start_posotion, 'end': end_position}
   21.78          couple_core_parts = {}
   21.79 -        
   21.80 -        
   21.81 -            
   21.82 +
   21.83 +
   21.84 +
   21.85  
   21.86          for protein_1 in alignment:
   21.87              pdb_code_1, pdb_chain_1, seq_part_1, model_1 = protein_1
   21.88              ID_1 = self.to_ID(pdb_code_1, pdb_chain_1, model_1)
   21.89  
   21.90              couple_core_parts[ID_1] = {}
   21.91 -            
   21.92 +
   21.93              for protein_2 in alignment:
   21.94                  pdb_code_2, pdb_chain_2, seq_part_2, model_2 = protein_2
   21.95                  ID_2 = self.to_ID(pdb_code_2, pdb_chain_2, model_2)
   21.96 -                
   21.97 +
   21.98                  if ID_1 == ID_2:
   21.99                      break
  21.100 -                
  21.101 -                
  21.102 +
  21.103 +
  21.104                  cores = self.check_block([protein_1, protein_2])
  21.105 -                
  21.106 +
  21.107  #                print ' '
  21.108 -                
  21.109 +
  21.110  #                print cores
  21.111 -                
  21.112 -                
  21.113 +
  21.114 +
  21.115                  couple_core_parts[ID_1][ID_2] = []
  21.116 -                
  21.117 +
  21.118                  all_cores = [] # list of positions of all cores
  21.119 -                
  21.120 +
  21.121                  for core in cores:
  21.122 -                
  21.123 +
  21.124                      parts = self.splice_to_continued(core, (ID_1, ID_2))
  21.125 -                    
  21.126 +
  21.127                      for part in parts:
  21.128 -                        
  21.129 +
  21.130                          if len(part) >= self.min_width:
  21.131 -                        
  21.132 +
  21.133                              all_cores.extend(clon(part))
  21.134 -                    
  21.135 +
  21.136                  all_cores = list(set(all_cores)) # unical elements
  21.137 -                
  21.138 +
  21.139                  all_cores.sort()
  21.140 -                    
  21.141 -                
  21.142 +
  21.143 +
  21.144                  parts = self.splice_to_continued(all_cores, (ID_1, ID_2))
  21.145 -                    
  21.146 +
  21.147                  for part in parts:
  21.148 -                
  21.149 +
  21.150                      if len(part) < self.min_width:
  21.151                          continue
  21.152 -                        
  21.153 +
  21.154                      couple_core_parts[ID_1][ID_2].append(clon({
  21.155                      'start': part[0], 'end': part[-1]}))
  21.156  
  21.157                  couple_core_parts[ID_2][ID_1] = couple_core_parts[ID_1][ID_2]
  21.158 -                
  21.159 +
  21.160  #                CORES.append(clon(core))
  21.161 -            
  21.162 -        
  21.163 -        
  21.164 +
  21.165 +
  21.166 +
  21.167  #        exit()
  21.168 -        
  21.169 +
  21.170          # list of elements of blocks graph
  21.171          # element is (ID, pos)
  21.172          graph_elements = []
  21.173 -        
  21.174 +
  21.175          # reverse dict to graph_elements
  21.176          graph_elements_1 = {}
  21.177 -        
  21.178 -        
  21.179 +
  21.180 +
  21.181          # graph[i][j] == 1, if these elements are connected, or i == j
  21.182          # i and j are indexes from list "graph_elements"
  21.183          graph = {}
  21.184 -        
  21.185 -        
  21.186 +
  21.187 +
  21.188          # same as graph, but include lists of cores n
  21.189  #        graph_cores = {}
  21.190 -        
  21.191 -        
  21.192 -        
  21.193 +
  21.194 +
  21.195 +
  21.196          # is_boundary[ID][pos] == 1, if pos is boundary of some part in ID
  21.197          is_boundary = {}
  21.198 -        
  21.199 -        
  21.200 -        
  21.201 +
  21.202 +
  21.203 +
  21.204          for ID_1 in IDs:
  21.205 -            
  21.206 -            
  21.207 +
  21.208 +
  21.209              # find boundaries
  21.210              is_boundary[ID_1] = {}
  21.211              for ID_2 in IDs:
  21.212 @@ -196,70 +196,70 @@
  21.213                  for part in couple_core_parts[ID_1][ID_2]:
  21.214                      is_boundary[ID_1][part['start']] = 1
  21.215                      is_boundary[ID_1][part['end']] = 1
  21.216 -                
  21.217  
  21.218 -        
  21.219 -        
  21.220 +
  21.221 +
  21.222 +
  21.223              for ID_2 in IDs:
  21.224 -                
  21.225 +
  21.226                  if ID_1 == ID_2:
  21.227                      break
  21.228 -                    
  21.229 -                
  21.230 -                
  21.231 -                
  21.232 -                
  21.233 +
  21.234 +
  21.235 +
  21.236 +
  21.237 +
  21.238                  for part in couple_core_parts[ID_1][ID_2]:
  21.239 -                    
  21.240 +
  21.241                      start = part['start']
  21.242                      end = part['end']
  21.243 -                    
  21.244 +
  21.245                      # elements of graph like (ID, pos)
  21.246                      elements = []
  21.247 -                    
  21.248 +
  21.249                      for pos in xrange(start, end + 1):
  21.250 -                        
  21.251 +
  21.252                          if not is_boundary[ID_1].has_key(pos) and not is_boundary[ID_2].has_key(pos):
  21.253                              continue
  21.254 -                    
  21.255 +
  21.256                          for ID in (ID_1, ID_2):
  21.257                              if not graph_elements_1.has_key((ID, pos)):
  21.258                                  L = len(graph_elements)
  21.259                                  graph_elements_1[(ID, pos)] = L
  21.260                                  graph[L] = {}
  21.261  #                                graph_cores[L] = {}
  21.262 -                                
  21.263 +
  21.264                                  graph_elements.append((ID, pos))
  21.265 -                                
  21.266 +
  21.267  
  21.268                              elements.append(graph_elements_1[(ID, pos)])
  21.269 -                            
  21.270 -                    
  21.271 +
  21.272 +
  21.273                      for element_1 in elements:
  21.274                          for element_2 in elements:
  21.275                              graph[element_1][element_2] = 1
  21.276 -                            
  21.277 +
  21.278  #                            # add information about cores
  21.279  #                            if not graph_cores[element_1].has_key(element_2):
  21.280  #                                graph_cores[element_1][element_2] = []
  21.281  #                            graph_cores[element_1][element_2].append(part['core'])
  21.282 -                            
  21.283 -                            
  21.284 -        
  21.285 +
  21.286 +
  21.287 +
  21.288          L = len(graph_elements)
  21.289 -        
  21.290 +
  21.291          for element_1 in xrange(0, L):
  21.292              for element_2 in xrange(0, L):
  21.293                  if not graph[element_1].has_key(element_2):
  21.294                      graph[element_1][element_2] = 0
  21.295 -        
  21.296 -        
  21.297 -        
  21.298 +
  21.299 +
  21.300 +
  21.301          kliki_1 = Kliki(graph, min_size=0, timeout=self.timeout_2).kliki
  21.302 -        
  21.303 -        
  21.304 -#        
  21.305 -#        
  21.306 +
  21.307 +
  21.308 +#
  21.309 +#
  21.310  #        # graf illustration
  21.311  #        for i in graph.keys():
  21.312  #            t = ''
  21.313 @@ -269,441 +269,441 @@
  21.314  #                else:
  21.315  #                    t += ' '
  21.316  #            print t
  21.317 -#        
  21.318 -#        
  21.319 -        
  21.320 -        
  21.321 +#
  21.322 +#
  21.323 +
  21.324 +
  21.325          alignment_len = len(alignment[0][2])
  21.326 -        
  21.327 -        
  21.328 -        
  21.329 +
  21.330 +
  21.331 +
  21.332          # occupied alignment elements
  21.333          occupied = {}
  21.334 -        
  21.335 +
  21.336          for ID in IDs:
  21.337 -            
  21.338 +
  21.339              occupied[ID] = [0] * alignment_len
  21.340 -            
  21.341 +
  21.342          # 00000000
  21.343          occupied_current0 = clon(occupied)
  21.344 -        
  21.345 -        
  21.346 -        
  21.347 -        
  21.348 -        
  21.349 -        
  21.350 -        
  21.351 +
  21.352 +
  21.353 +
  21.354 +
  21.355 +
  21.356 +
  21.357 +
  21.358  #        print graph
  21.359 -        
  21.360 +
  21.361  #        print kliki_1
  21.362 -        
  21.363 -        
  21.364 +
  21.365 +
  21.366          rectangles = []
  21.367 -        
  21.368 +
  21.369          for klika in kliki_1:
  21.370 -        
  21.371 +
  21.372              if self.rectangles_type == 'out':
  21.373 -                
  21.374 -                # transitivity 
  21.375 -                
  21.376 +
  21.377 +                # transitivity
  21.378 +
  21.379                  IDs_dict = {}
  21.380                  start = alignment_len
  21.381                  end = 0
  21.382 -                
  21.383 +
  21.384                  for element in klika:
  21.385 -                    
  21.386 +
  21.387                      ID, pos = graph_elements[element]
  21.388 -                    
  21.389 +
  21.390                      IDs_dict[ID] = 1
  21.391 -                    
  21.392 +
  21.393                      start = min(start, pos)
  21.394                      end = max(end, pos)
  21.395 -                
  21.396 +
  21.397                  IDs = IDs_dict.keys()
  21.398 -                
  21.399 -                
  21.400 +
  21.401 +
  21.402                  rectangles.append({'IDs': IDs, 'start': start, 'end': end})
  21.403 -                
  21.404 +
  21.405              else:
  21.406 -                
  21.407 +
  21.408                  ID_start = {}
  21.409                  ID_end = {}
  21.410 -                
  21.411 +
  21.412                  for element in klika:
  21.413 -                    
  21.414 +
  21.415                      ID, pos = graph_elements[element]
  21.416 -                    
  21.417 -                    
  21.418 +
  21.419 +
  21.420                      if not ID_start.has_key(ID):
  21.421                          ID_start[ID] = pos
  21.422                          ID_end[ID] = pos
  21.423                      else:
  21.424                          ID_start[ID] = min(ID_start[ID], pos)
  21.425                          ID_end[ID] = max(ID_end[ID], pos)
  21.426 -                
  21.427 -                
  21.428 +
  21.429 +
  21.430                  # empty table
  21.431                  occupied_current = clon(occupied_current0)
  21.432 -                
  21.433 -                
  21.434 -                
  21.435 +
  21.436 +
  21.437 +
  21.438                  for ID, start in ID_start.items():
  21.439                      end = ID_end[ID]
  21.440 -                    
  21.441 +
  21.442                      for pos in xrange(start, end + 1):
  21.443                          occupied_current[ID][pos] = 1
  21.444 -                
  21.445 -                
  21.446 +
  21.447 +
  21.448                  rectangles.extend(self.splice_to_rect(occupied_current))
  21.449 -        
  21.450 -        
  21.451 -        
  21.452 +
  21.453 +
  21.454 +
  21.455          # sort by height and then by width
  21.456 -        
  21.457 +
  21.458          rectangles.sort(self.rectangles_compare)
  21.459 -        
  21.460 -        
  21.461 -        
  21.462 -        
  21.463 -        
  21.464 -        
  21.465 -        
  21.466 -        
  21.467 -        
  21.468 +
  21.469 +
  21.470 +
  21.471 +
  21.472 +
  21.473 +
  21.474 +
  21.475 +
  21.476 +
  21.477          rectangles_2 = []
  21.478 -        
  21.479 -        
  21.480 +
  21.481 +
  21.482          for rectangle in rectangles:
  21.483 -            
  21.484 +
  21.485              # empty table
  21.486              occupied_current = clon(occupied_current0)
  21.487 -            
  21.488 +
  21.489              for ID in rectangle['IDs']:
  21.490 -                
  21.491 +
  21.492                  for pos in xrange(rectangle['start'], rectangle['end'] + 1):
  21.493 -                
  21.494 +
  21.495                      if not occupied[ID][pos]:
  21.496 -                    
  21.497 +
  21.498                          occupied_current[ID][pos] = 1
  21.499 -            
  21.500 +
  21.501              rectangles_current = self.splice_to_rect(occupied_current)
  21.502 -            
  21.503 -            
  21.504 -            
  21.505 +
  21.506 +
  21.507 +
  21.508              rectangles_2.extend(rectangles_current)
  21.509 -            
  21.510 +
  21.511              for rectangle in rectangles_current:
  21.512 -                
  21.513 +
  21.514  #                if len(rectangle['IDs']) ==2 and rectangle['IDs'][1] == '1bw5_A_4':
  21.515  #                    print occupied_current, rectangles
  21.516 -                    
  21.517 -                    
  21.518 +
  21.519 +
  21.520                  for ID in rectangle['IDs']:
  21.521                      for pos in xrange(rectangle['start'], rectangle['end'] + 1):
  21.522                          occupied[ID][pos] = 1
  21.523 -            
  21.524 -        
  21.525 -        
  21.526 +
  21.527 +
  21.528 +
  21.529          rectangles = clon(rectangles_2)
  21.530 -        
  21.531 -        
  21.532 -        
  21.533 -        
  21.534 -        
  21.535 +
  21.536 +
  21.537 +
  21.538 +
  21.539 +
  21.540          # delete blocks, including 1 sequence
  21.541 -        
  21.542 +
  21.543          rectangles = filter(self.rectangle_filter, rectangles)
  21.544 -        
  21.545 -        
  21.546 +
  21.547 +
  21.548          return rectangles
  21.549 -            
  21.550 -        
  21.551 -        
  21.552 -#        
  21.553 -#    # transitivity 
  21.554 +
  21.555 +
  21.556 +
  21.557 +#
  21.558 +#    # transitivity
  21.559  #    def splice_to_outrect(self, occupied_current, occupied):
  21.560  #        """
  21.561  #        Splice multitude of alignment elements to rectangles
  21.562 -#        
  21.563 +#
  21.564  #        Returns:
  21.565  #            new occupied_current (out rectangle) \ occupied
  21.566 -#                
  21.567 +#
  21.568  #        """
  21.569 -#        
  21.570 +#
  21.571  #        width = len(occupied_current.values()[0])
  21.572 -#        
  21.573 +#
  21.574  #        start = width # the most right position
  21.575  #        end = 0 # the most left position
  21.576 -#        
  21.577 +#
  21.578  #        IDs_dict = {}
  21.579 -#        
  21.580 +#
  21.581  #        for ID, line in occupied_current.items():
  21.582 -#            
  21.583 +#
  21.584  #            for pos, condition in enumerate(line):
  21.585 -#                
  21.586 +#
  21.587  #                if condition:
  21.588 -#                    
  21.589 +#
  21.590  #                    IDs_dict[ID] = 1
  21.591  #                    start = min(start, pos)
  21.592  #                    end = max(end, pos)
  21.593 -#            
  21.594 +#
  21.595  #        IDs = IDs_dict.keys()
  21.596 -#        
  21.597 -#        
  21.598 -#        
  21.599 +#
  21.600 +#
  21.601 +#
  21.602  #        occupied_current_1 = {}
  21.603 -#        
  21.604 -#        
  21.605 -#        
  21.606 +#
  21.607 +#
  21.608 +#
  21.609  #        for ID in IDs:
  21.610 -#            
  21.611 +#
  21.612  #            occupied_current_1[ID] = [0] * width
  21.613 -#            
  21.614 +#
  21.615  #            for pos in xrange(start, end + 1):
  21.616  #
  21.617  #                if not occupied[ID][pos]:
  21.618  #
  21.619  #                    occupied_current_1[ID][pos] = 1
  21.620 -#            
  21.621 +#
  21.622  #        return occupied_current_1
  21.623 -#        
  21.624 -#        
  21.625 -        
  21.626 -        
  21.627 +#
  21.628 +#
  21.629 +
  21.630 +
  21.631      def rectangles_compare(self, x, y):
  21.632 -        
  21.633 +
  21.634          height_x = len(x['IDs'])
  21.635          height_y = len(y['IDs'])
  21.636 -        
  21.637 +
  21.638          if height_x > height_y:
  21.639              return -1
  21.640          if height_x < height_y:
  21.641              return 1
  21.642 -            
  21.643 +
  21.644          # same heights
  21.645 -        
  21.646 +
  21.647          width_x = x['end'] - x['start'] + 1
  21.648          width_y = y['end'] - y['start'] + 1
  21.649 -        
  21.650 +
  21.651          if width_x > width_y:
  21.652              return -1
  21.653          if width_x < width_y:
  21.654              return 1
  21.655 -        
  21.656 +
  21.657          return 0
  21.658 -    
  21.659 -    
  21.660 -    
  21.661 -    
  21.662 +
  21.663 +
  21.664 +
  21.665 +
  21.666      def rectangle_filter(self, rectangle):
  21.667 -        
  21.668 +
  21.669          height = len(rectangle['IDs'])
  21.670 -        
  21.671 +
  21.672          width = rectangle['end'] - rectangle['start'] + 1
  21.673 -        
  21.674 +
  21.675          if height >= 2 and width >= self.min_width:
  21.676              return True
  21.677          else:
  21.678              return False
  21.679 -        
  21.680 -        
  21.681 -        
  21.682 -        
  21.683 +
  21.684 +
  21.685 +
  21.686 +
  21.687      def splice_to_rect(self, occupied_current):
  21.688          """
  21.689          Splice multitude of alignment elements to rectangles
  21.690 -        
  21.691 +
  21.692          Returns list of rectangles:
  21.693              rectangle is dict:
  21.694                  'start'
  21.695                  'end'
  21.696                  'IDs' - list
  21.697 -                
  21.698 +
  21.699          """
  21.700 -        
  21.701 +
  21.702  #        occupied_current_1 = splice_to_outrect(occupied_current, occupied)
  21.703  
  21.704          occupied_current_1 = clon(occupied_current)
  21.705 -        
  21.706 +
  21.707          width = len(occupied_current_1.values()[0])
  21.708 -        
  21.709 -        
  21.710 +
  21.711 +
  21.712          rectangles = []
  21.713 -        
  21.714 +
  21.715          rectangles_count = 0
  21.716 -        
  21.717 -        
  21.718 -        while 1: 
  21.719 -            
  21.720 +
  21.721 +
  21.722 +        while 1:
  21.723 +
  21.724              excluded_IDs = {}
  21.725 -            
  21.726 +
  21.727              while 1:
  21.728 -                
  21.729 +
  21.730                  IDs_occupied = {} # is ID occupied
  21.731 -                
  21.732 +
  21.733                  height = {} # this position height
  21.734 -                
  21.735 +
  21.736                  for pos in xrange(0, width):
  21.737 -                    
  21.738 +
  21.739                      height[pos] = 0
  21.740 -                    
  21.741 +
  21.742                      for ID, e in occupied_current_1.items():
  21.743 -                        
  21.744 +
  21.745                          if excluded_IDs.has_key(ID):
  21.746                              continue
  21.747 -                        
  21.748 +
  21.749                          if  e[pos]:
  21.750                              height[pos] += 1
  21.751 -                            
  21.752 +
  21.753                              IDs_occupied[ID] = 1
  21.754 -                
  21.755 -                
  21.756 +
  21.757 +
  21.758                  max_height = len(IDs_occupied.keys())
  21.759 -                
  21.760 -                
  21.761 +
  21.762 +
  21.763                  if max_height <= 1:
  21.764                      break
  21.765 -                
  21.766 -                
  21.767 -                
  21.768 +
  21.769 +
  21.770 +
  21.771                  max_height_positions = []
  21.772 -                
  21.773 +
  21.774                  for pos in xrange(0, width):
  21.775                      if height[pos] == max_height:
  21.776                          max_height_positions.append(pos)
  21.777 -                        
  21.778 -                
  21.779 +
  21.780 +
  21.781                  parts = self.splice_to_continued(max_height_positions, IDs_occupied.keys())
  21.782  
  21.783 -                
  21.784 +
  21.785                  ok = 0
  21.786 -                
  21.787 +
  21.788                  for part in parts:
  21.789 -                    
  21.790 +
  21.791                      if len(part) >= self.min_width:
  21.792 -                        
  21.793 +
  21.794                          # It is block!
  21.795 -                        
  21.796 +
  21.797                          IDs = IDs_occupied.keys()
  21.798                          start = part[0]
  21.799                          end = part[-1]
  21.800 -                        
  21.801 -                        
  21.802 +
  21.803 +
  21.804                          rectangles.append(clon({'IDs': IDs, 'start': start, 'end': end, 'cores':[]}))
  21.805 -                        
  21.806 -                        
  21.807 +
  21.808 +
  21.809                          # exclude elements
  21.810 -                        
  21.811 +
  21.812                          for ID in IDs:
  21.813                              for pos in xrange(start, end + 1):
  21.814                                  occupied_current_1[ID][pos] = 0
  21.815 -                        
  21.816 -                        
  21.817 -                
  21.818 +
  21.819 +
  21.820 +
  21.821                          ok = 1
  21.822 -                        
  21.823 +
  21.824                  if ok:
  21.825                      continue
  21.826 -                    
  21.827 -                
  21.828 -                
  21.829 -                
  21.830 +
  21.831 +
  21.832 +
  21.833 +
  21.834                  # find the shortest sequence
  21.835 -                
  21.836 -                
  21.837 +
  21.838 +
  21.839                  max_width_dict = {}
  21.840 -                
  21.841 +
  21.842                  for ID, e in occupied_current_1.items():
  21.843 -                    
  21.844 +
  21.845                      if excluded_IDs.has_key(ID):
  21.846                          continue
  21.847 -                    
  21.848 +
  21.849                      positions = []
  21.850 -                    
  21.851 +
  21.852                      for pos in xrange(0, width):
  21.853 -                        
  21.854 +
  21.855                          if height[pos] <= 1:
  21.856                              # "bad" position
  21.857                              continue
  21.858 -                        
  21.859 +
  21.860                          if e[pos]:
  21.861                              positions.append(pos)
  21.862 -                    
  21.863 +
  21.864                      if len(positions) == 0:
  21.865                          continue
  21.866 -                        
  21.867 +
  21.868                      parts = self.splice_to_continued(positions, [ID])
  21.869 -                    
  21.870 +
  21.871                      max_part_len = 0
  21.872 -                    
  21.873 +
  21.874                      for part in parts:
  21.875                          max_part_len = max(len(part), max_part_len)
  21.876 -                    
  21.877 +
  21.878                      max_width_dict[ID] = max_part_len
  21.879 -                    
  21.880 -                
  21.881 +
  21.882 +
  21.883                  if len(max_width_dict.values()):
  21.884                      min_width = min(max_width_dict.values())
  21.885                  else:
  21.886                      break
  21.887 -                
  21.888 +
  21.889                  # exclude these sequences
  21.890 -                
  21.891 +
  21.892                  for ID, e in occupied_current_1.items():
  21.893 -                    
  21.894 +
  21.895                      if max_width_dict.has_key(ID):
  21.896                          #if max_width_dict[ID] == min_width:
  21.897                          if max_width_dict[ID] <= min_width:
  21.898 -                            
  21.899 +
  21.900                              excluded_IDs[ID] = 1
  21.901 -                            
  21.902 +
  21.903  #                            for pos in xrange(0, width):
  21.904  #                                occupied_current_1[ID][pos] = 0
  21.905  
  21.906 -            
  21.907 +
  21.908  #            print rectangles_count
  21.909 -            
  21.910 +
  21.911              if len(rectangles) == rectangles_count:
  21.912                  break
  21.913 -                
  21.914 +
  21.915              rectangles_count = len(rectangles)
  21.916 -            
  21.917 +
  21.918  #            print rectangles_count
  21.919 -            
  21.920 -                        
  21.921 +
  21.922 +
  21.923          return rectangles
  21.924 -        
  21.925 -        
  21.926 -        
  21.927 -        
  21.928 -        
  21.929 -        
  21.930 -        
  21.931 -        
  21.932 -        
  21.933 -        
  21.934 -    
  21.935 -    
  21.936 -    
  21.937 -    
  21.938 -    
  21.939 -    
  21.940 -    
  21.941 -    
  21.942 -    
  21.943 -    
  21.944 -    
  21.945 -    
  21.946 -    
  21.947 -    
  21.948 -    
  21.949 +
  21.950 +
  21.951 +
  21.952 +
  21.953 +
  21.954 +
  21.955 +
  21.956 +
  21.957 +
  21.958 +
  21.959 +
  21.960 +
  21.961 +
  21.962 +
  21.963 +
  21.964 +
  21.965 +
  21.966 +
  21.967 +
  21.968 +
  21.969 +
  21.970 +
  21.971 +
  21.972 +
  21.973 +
  21.974      def splice_to_continued(self, LIST, IDs):
  21.975          """
  21.976          LIST is a list of positions of alignment
  21.977 -        
  21.978 +
  21.979          returns list of continued lists, constituted source LIST
  21.980          """
  21.981 -        
  21.982 +
  21.983          LIST.sort()
  21.984  
  21.985  
  21.986 @@ -713,38 +713,38 @@
  21.987          for pos in LIST:
  21.988  
  21.989              if len(parts[-1]):
  21.990 -                
  21.991 +
  21.992                  if pos - parts[-1][-1] == 1:
  21.993 -                    
  21.994 +
  21.995                      parts[-1].append(pos)
  21.996 -                    
  21.997 +
  21.998                  else:
  21.999 -                    
 21.1000 +
 21.1001                      # check omited sequence
 21.1002 -                    
 21.1003 +
 21.1004                      gappes = 1 # all omited sequence is gaps
 21.1005 -                    
 21.1006 +
 21.1007                      for x in xrange(parts[-1][-1] + 1, pos):
 21.1008 -                        
 21.1009 +
 21.1010                          for ID in IDs:
 21.1011                              if self.structure1[ID][x]:
 21.1012                                  gappes = 0
 21.1013                                  break
 21.1014 -                                
 21.1015 +
 21.1016                          if gappes == 0:
 21.1017                              break
 21.1018 -                            
 21.1019 +
 21.1020                      if not gappes:
 21.1021 -                    
 21.1022 +
 21.1023                          # There are not only gapes
 21.1024                          parts.append([pos])
 21.1025 -                    
 21.1026 -            else: 
 21.1027 +
 21.1028 +            else:
 21.1029                  # first element
 21.1030 -                
 21.1031 +
 21.1032                  parts[-1].append(pos)
 21.1033 -                            
 21.1034 -        
 21.1035 +
 21.1036 +
 21.1037          return clon(parts)
 21.1038  
 21.1039  
    22.1 --- a/blocks3d/GeometricalCore_class.py	Mon Jan 24 21:40:10 2011 +0300
    22.2 +++ b/blocks3d/GeometricalCore_class.py	Tue Jan 25 16:03:00 2011 +0300
    22.3 @@ -45,14 +45,14 @@
    22.4  
    22.5  
    22.6  
    22.7 -import os # to control file existence 
    22.8 +import os # to control file existence
    22.9  
   22.10  import urllib2 # to upload PDB from Internet
   22.11  
   22.12  import re # regulas expression
   22.13  
   22.14  
   22.15 -from Kliki import Kliki # algorithm to find core(s) from graph 
   22.16 +from Kliki import Kliki # algorithm to find core(s) from graph
   22.17  
   22.18  import AAdict # dict to convert 3-N code to 1-N
   22.19  import AlignmentSeq # sequense alignment class
   22.20 @@ -73,14 +73,14 @@
   22.21  
   22.22      self.structure
   22.23          3d Coordinates of CA atoms of each structure
   22.24 -        
   22.25 +
   22.26          Formation:
   22.27              self.structure[structure name][atom identifier] = list [x,y,z]
   22.28              where:
   22.29                  structure name - string like "1jcc:A"
   22.30                  atom identifier - number of residue i PDB file, first is 0
   22.31                  x,y,x - Coordinates of CA atoms in PDB
   22.32 -        
   22.33 +
   22.34  
   22.35      self.seq
   22.36          sequences of each structures
   22.37 @@ -88,11 +88,11 @@
   22.38          Formation:
   22.39              self.seq[structure name] = string like "SSNAKIDQLSSDAQ"
   22.40                  where: structure name - string like "1jcc:A"
   22.41 -    
   22.42 +
   22.43      self.structure1 - same as self.structure, but atom identifier is number in block
   22.44 -    
   22.45 +
   22.46      self.d - distances
   22.47 -        self.d[ID][i][j] = distance between i and j atoms in structure ID  
   22.48 +        self.d[ID][i][j] = distance between i and j atoms in structure ID
   22.49  
   22.50  
   22.51  
   22.52 @@ -109,13 +109,13 @@
   22.53          self.alternative_core_new_atoms - part of new atoms in alternative core
   22.54  
   22.55          self.alternative_cores_count - max number of cores (including main core)
   22.56 -    
   22.57 +
   22.58      """
   22.59  
   22.60  
   22.61  
   22.62  
   22.63 -    
   22.64 +
   22.65      def __init__(self):
   22.66  
   22.67          """
   22.68 @@ -123,21 +123,21 @@
   22.69  
   22.70          Creates self vars self.structure and self.seq
   22.71          """
   22.72 -        
   22.73 +
   22.74          vars(self).update(vars(GeometricalCore_config))
   22.75  
   22.76  
   22.77          self.structure = {}
   22.78          self.structure1 = {}
   22.79          self.d = {}
   22.80 -        
   22.81 -        
   22.82 +
   22.83 +
   22.84          self.seq = {}
   22.85          self.pdb_text = {} # there will be PDB texts in this dict. key - string like "1jcc"
   22.86 -        
   22.87 -        
   22.88 -        
   22.89 -        
   22.90 +
   22.91 +
   22.92 +
   22.93 +
   22.94  
   22.95  
   22.96  
   22.97 @@ -148,7 +148,7 @@
   22.98          If you use at first one boundaries (in positions) of block in some PDB file,
   22.99          and then decide to change them, run this method before running check_block method
  22.100          """
  22.101 -    
  22.102 +
  22.103          self.structure1 = {}
  22.104          self.d = {}
  22.105  
  22.106 @@ -180,108 +180,108 @@
  22.107  #
  22.108  #
  22.109  #        if alternative_cores_count != None:
  22.110 -#            self.alternative_cores_count = alternative_cores_count            
  22.111 +#            self.alternative_cores_count = alternative_cores_count
  22.112  #
  22.113  #        if timeout != None:
  22.114 -#            self.timeout = timeout            
  22.115 +#            self.timeout = timeout
  22.116  #
  22.117  #
  22.118  #
  22.119  
  22.120  
  22.121  
  22.122 -    
  22.123 +
  22.124      def to_ID(self, pdb_code, pdb_chain, model):
  22.125 -    
  22.126 +
  22.127          """
  22.128          Makes standart ID for structure from pdb_code, chain and model
  22.129          """
  22.130 -    
  22.131 +
  22.132          ID = pdb_code + '_'
  22.133 -        
  22.134 +
  22.135          if pdb_chain:
  22.136              ID = ID + pdb_chain.upper().strip()
  22.137  
  22.138          if model:
  22.139              ID = ID + '_' + model
  22.140 -            
  22.141 +
  22.142          return ID
  22.143 -        
  22.144 -        
  22.145 -        
  22.146 -        
  22.147 -        
  22.148 -        
  22.149 +
  22.150 +
  22.151 +
  22.152 +
  22.153 +
  22.154 +
  22.155      def from_ID(self, ID):
  22.156 -    
  22.157 +
  22.158          """
  22.159          Parse standart ID to pdb_code, chain and model
  22.160          """
  22.161 -        
  22.162 -        
  22.163 +
  22.164 +
  22.165          if not hasattr(self, 're1'):
  22.166 -        
  22.167 -        
  22.168 +
  22.169 +
  22.170              # makes compiled regular expressions
  22.171 -            
  22.172 +
  22.173              # for pdb-codes
  22.174              self.re1 = re.compile(r"(^|[^a-z0-9])([0-9][0-9a-z]{3})([^a-z0-9]([0-9a-z ]?)([^a-z0-9]([0-9]{1,3}))?)?($|[^a-z0-9])")
  22.175 -            
  22.176 +
  22.177              # for files
  22.178              self.re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
  22.179 -            
  22.180 -            
  22.181 +
  22.182 +
  22.183              self.from_ID_dict = {}
  22.184 -            
  22.185 -            
  22.186 -                
  22.187 -        
  22.188 +
  22.189 +
  22.190 +
  22.191 +
  22.192          if self.from_ID_dict.has_key(ID):
  22.193              return self.from_ID_dict[ID]
  22.194 -            
  22.195 -        
  22.196 +
  22.197 +
  22.198          if ID.lower().find('.ent') != -1 or ID.lower().find('.pdb') != -1:
  22.199              # it is file
  22.200              parseO = self.re2.search(ID) # files
  22.201          else:
  22.202              parseO = self.re1.search(ID.lower()) # pdb codes
  22.203 -        
  22.204 -            
  22.205 +
  22.206 +
  22.207          if not parseO:
  22.208              return None
  22.209 -            
  22.210 +
  22.211          parse = parseO.groups()
  22.212 -        
  22.213 -        
  22.214 +
  22.215 +
  22.216          if len(parse) < 2:
  22.217              return None
  22.218 -            
  22.219 -        
  22.220 +
  22.221 +
  22.222          code = parse[1]
  22.223 -        
  22.224 +
  22.225  #        if not file:
  22.226  #            code = code.lower()
  22.227 -        
  22.228 +
  22.229          chain = ''
  22.230          model = None
  22.231 -        
  22.232 +
  22.233          if len(parse) >= 4:
  22.234              chain = parse[3]
  22.235 -            
  22.236 +
  22.237              if chain:
  22.238                  chain = chain.upper()
  22.239 -        
  22.240 +
  22.241              if len(parse) >= 6:
  22.242                  if parse[5]:
  22.243                      model = parse[5]
  22.244 -                
  22.245 -        
  22.246 -        
  22.247 +
  22.248 +
  22.249 +
  22.250          self.from_ID_dict[ID] = (code, chain, model) # save parsing results
  22.251 -        
  22.252 +
  22.253          return code, chain, model
  22.254 -        
  22.255 -        
  22.256 +
  22.257 +
  22.258  
  22.259  
  22.260  
  22.261 @@ -296,15 +296,15 @@
  22.262                  It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None]
  22.263                      SSNAKIDQLSSDAQ - part of alignment block sequence
  22.264                      Last parameter - model or None
  22.265 -                    
  22.266 +
  22.267  
  22.268          Result:
  22.269              returns list of core atom identifiers (first is 0)
  22.270  
  22.271              Example: [0,1,2,3,4,6]
  22.272          """
  22.273 -        
  22.274 -        
  22.275 +
  22.276 +
  22.277  
  22.278  
  22.279          # check lendth of input sequences
  22.280 @@ -316,13 +316,13 @@
  22.281                  print 'Sequences must have equal lengths'
  22.282                  print 'Task was aborted!'
  22.283                  return
  22.284 -        
  22.285  
  22.286  
  22.287 -        
  22.288  
  22.289  
  22.290 -        
  22.291 +
  22.292 +
  22.293 +
  22.294          pdb_list = [] # list of uploading PDB names
  22.295  
  22.296          for pdb_code, pdb_chain, seq_part, model in block:
  22.297 @@ -330,12 +330,12 @@
  22.298              ID = self.to_ID(pdb_code, pdb_chain, model)
  22.299  
  22.300              if not self.seq.has_key(ID) or not self.structure.has_key(ID):
  22.301 -            
  22.302 +
  22.303  #                pdb_code = pdb_code.lower()
  22.304 -                
  22.305 +
  22.306                  if pdb_chain:
  22.307                      pdb_chain = pdb_chain.upper()
  22.308 -                    
  22.309 +
  22.310                  pdb_list.append((pdb_code, pdb_chain, model))
  22.311  
  22.312  
  22.313 @@ -348,41 +348,41 @@
  22.314          for pdb_code, pdb_chain, seq_part, model in block:
  22.315  
  22.316              ID = self.to_ID(pdb_code, pdb_chain, model)
  22.317 -            
  22.318 -            
  22.319 +
  22.320 +
  22.321  
  22.322  
  22.323  
  22.324              if not self.seq.has_key(ID) or not self.structure.has_key(ID): # no structure :(
  22.325                  print 'Undefined structuce ' + ID
  22.326                  print 'Task was aborted!'
  22.327 -                return            
  22.328 +                return
  22.329  
  22.330              if not self.seq[ID] or not self.structure[ID]: # no structure :(
  22.331                  print 'Undefined structuce ' + ID
  22.332                  print 'Task was aborted!'
  22.333                  return
  22.334 -        
  22.335  
  22.336  
  22.337  
  22.338 -        
  22.339  
  22.340 -#        absent = {} # [position identifier] = 1, if some structure has not this atom       
  22.341 +
  22.342 +
  22.343 +#        absent = {} # [position identifier] = 1, if some structure has not this atom
  22.344  
  22.345  #        structure = {} # [structure name][position in block] = list [x,y,z]
  22.346 -        
  22.347 +
  22.348          IDs = []
  22.349 -        
  22.350 +
  22.351          for pdb_code, pdb_chain, seq_part, model in block:
  22.352  
  22.353              ID = self.to_ID(pdb_code, pdb_chain, model)
  22.354 -            
  22.355 +
  22.356              IDs.append(ID)
  22.357  
  22.358              if self.structure1.has_key(ID):
  22.359                  continue
  22.360 -            
  22.361 +
  22.362  
  22.363              # alignment part of sequence with full sequence
  22.364              connections = self.find_sequence_piece(self.seq[ID], seq_part)
  22.365 @@ -393,7 +393,7 @@
  22.366              self.structure1[ID] = []
  22.367  
  22.368  
  22.369 -            
  22.370 +
  22.371              for i in xrange(0, len(connections)):
  22.372                  if connections[i] == '-': # there is not required position in PDB
  22.373                      self.structure1[ID].append(None)
  22.374 @@ -434,15 +434,15 @@
  22.375  
  22.376  #
  22.377  #        # save CA atom coordinates
  22.378 -#        self.structure1 = structure 
  22.379 +#        self.structure1 = structure
  22.380  #
  22.381 -        
  22.382 +
  22.383          return cores
  22.384  
  22.385  
  22.386  
  22.387  
  22.388 -    
  22.389 +
  22.390  
  22.391  
  22.392  
  22.393 @@ -465,7 +465,7 @@
  22.394          codes = [] # list of PDB codes
  22.395  
  22.396          # get texts of each PDB
  22.397 -        
  22.398 +
  22.399          for pdb_code, pdb_chain, model in pdb_list:
  22.400              if not codes.count(pdb_code):
  22.401                  codes.append(pdb_code)
  22.402 @@ -475,53 +475,53 @@
  22.403  
  22.404  
  22.405              # check PDB file existance in folder ./pdb/
  22.406 -            
  22.407 +
  22.408              fname = 'pdb/' + pdb_code
  22.409 -            
  22.410 +
  22.411              if pdb_code.lower().find('.ent') == -1 and pdb_code.lower().find('.pdb') == -1:
  22.412                  # pdb code
  22.413                  fname += '.ent'
  22.414 -            
  22.415 -            
  22.416 +
  22.417 +
  22.418              if os.path.exists(fname):
  22.419                  self.pdb_text[pdb_code] = open(fname).readlines()
  22.420              else:
  22.421 -                # try to upload from Internet 
  22.422 +                # try to upload from Internet
  22.423                  try:
  22.424  
  22.425                      url = self.pdb_url.replace('XXXX', pdb_code)
  22.426 -                    
  22.427 +
  22.428                      self.pdb_text[pdb_code] = urllib2.urlopen(url).readlines()
  22.429  
  22.430 -                    
  22.431 +
  22.432                  except:
  22.433                      self.pdb_text[pdb_code] = ''
  22.434 -                    
  22.435 -                    
  22.436 +
  22.437 +
  22.438                  if self.save:
  22.439                      # save information
  22.440 -                    
  22.441 +
  22.442                      try:
  22.443 -                    
  22.444 -                    
  22.445 +
  22.446 +
  22.447                          # check folder "/pdb" existance
  22.448 -                        
  22.449 +
  22.450                          if not os.path.exists('pdb'):
  22.451 -                        
  22.452 +
  22.453                              # make folder
  22.454                              os.mkdir('pdb');
  22.455 -                        
  22.456 -                        
  22.457 +
  22.458 +
  22.459                          pdbfile_to_save = open('pdb/' + pdb_code + '.ent', 'w')
  22.460                          pdbfile_to_save.write(''.join(self.pdb_text[pdb_code]))
  22.461                          pdbfile_to_save.close()
  22.462 -                        
  22.463 +
  22.464                          print 'PDB structure saved to file pdb/' + pdb_code + '.ent'
  22.465 -					
  22.466 +
  22.467                      except:
  22.468 -                        
  22.469 +
  22.470                          print 'Can not write file pdb/' + pdb_code + '.ent'
  22.471 -                    
  22.472 +
  22.473  
  22.474              if not self.pdb_text[pdb_code]:
  22.475                  #print self.pdb_url.replace('XXXX', pdb_code)
  22.476 @@ -532,43 +532,43 @@
  22.477          # We have all PDB texts
  22.478  
  22.479  
  22.480 -        # read all texts and get CA atoms coordinates 
  22.481 +        # read all texts and get CA atoms coordinates
  22.482  
  22.483          for pdb_code, pdb_chain, model in pdb_list:
  22.484 -        
  22.485 +
  22.486              ID = self.to_ID(pdb_code, pdb_chain, model)
  22.487 -            
  22.488 +
  22.489              S = self.read_pdb(pdb_code, pdb_chain, self.pdb_text[pdb_code], model)
  22.490              self.structure[ID], self.seq[ID] = S
  22.491  
  22.492  
  22.493              if not self.seq[ID]:
  22.494                  print 'Can not find sequence for "' + ID + '"'
  22.495 -                return 
  22.496 -         
  22.497 +                return
  22.498  
  22.499  
  22.500  
  22.501 -        
  22.502 +
  22.503 +
  22.504  
  22.505      def read_pdb(self, pdb_code, pdb_chain, pdb_text, model):
  22.506          """
  22.507          Reads pdb_text
  22.508 -        
  22.509 +
  22.510          Returns (structure, sequence)
  22.511              structure (key - string like "1jcc:A")
  22.512                  This element is dict with [x,y,z] coordinates of each aa
  22.513              sequence
  22.514          """
  22.515  
  22.516 -       
  22.517 +
  22.518  
  22.519          structure = [] # dict with coordinates list
  22.520          seq = ''  # sequense (string)
  22.521 -        
  22.522 -        
  22.523 +
  22.524 +
  22.525          Model_already = 0 # is it our model now
  22.526 -        
  22.527 +
  22.528          if not model:
  22.529              Model_already = 1
  22.530  
  22.531 @@ -577,52 +577,52 @@
  22.532  
  22.533  
  22.534          for currentline in pdb_text:
  22.535 -        
  22.536 -        
  22.537 +
  22.538 +
  22.539              if not Model_already:
  22.540                  if currentline[0:5] == 'MODEL':
  22.541                      if currentline[10:14].strip() == model:
  22.542                          Model_already = 1
  22.543                          continue
  22.544 -                        
  22.545 +
  22.546                  continue
  22.547 -                
  22.548 -                
  22.549 -            
  22.550 +
  22.551 +
  22.552 +
  22.553              if currentline[0:6] == 'ENDMDL':
  22.554                  break
  22.555  
  22.556  
  22.557              if len(currentline) < 54:
  22.558                  continue
  22.559 -            
  22.560 -            
  22.561 +
  22.562 +
  22.563  
  22.564              if currentline[0:4] != "ATOM":
  22.565                  continue
  22.566 -                
  22.567 -            
  22.568 +
  22.569 +
  22.570              atomType = currentline[12:16].strip()
  22.571  
  22.572              if atomType != "CA":
  22.573                  continue
  22.574 -                
  22.575 -            
  22.576 +
  22.577 +
  22.578              thisChain = currentline[21].strip()
  22.579  
  22.580              if thisChain != pdb_chain:
  22.581                  continue
  22.582 -                
  22.583 -            
  22.584 +
  22.585 +
  22.586              alterCode = currentline[16] # Alter code
  22.587 -                
  22.588 +
  22.589              if alterCode != ' ' and alterCode != 'A':
  22.590                  continue
  22.591  
  22.592 -            
  22.593 +
  22.594              thisAminoAcid = currentline[17:20].strip()
  22.595  
  22.596 -                      
  22.597 +
  22.598              thisX = float(currentline[30:38].strip())
  22.599              thisY = float(currentline[38:46].strip())
  22.600              thisZ = float(currentline[46:54].strip())
  22.601 @@ -637,7 +637,7 @@
  22.602                  seq = seq + AAdict[thisAminoAcid][0]
  22.603              else:
  22.604                  seq = seq + 'x'
  22.605 -        
  22.606 +
  22.607          return (structure, seq)
  22.608  
  22.609  
  22.610 @@ -645,35 +645,34 @@
  22.611  
  22.612  
  22.613  
  22.614 -###     
  22.615 -###     
  22.616 -###     
  22.617 +###
  22.618 +###
  22.619 +###
  22.620  ###         def add_pdb(self, pdb_code, pdb_chain, pdb_text, model):
  22.621 -###     
  22.622 +###
  22.623  ###             """
  22.624  ###             Method read PDB and get coordinates of CA atoms
  22.625 -###     
  22.626 +###
  22.627  ###             Arguments:
  22.628  ###                 pdb_code - PDB name, for example "1jcc"
  22.629  ###                 pdb_chain - chain, for example "A"
  22.630  ###                 pdb_text - PDB file strings
  22.631 -###     
  22.632 -###     
  22.633 +###
  22.634 +###
  22.635  ###             Result:
  22.636  ###                 New element will be added in dict self.structure (key - string like "1jcc:A")
  22.637  ###                 This element is dict with [x,y,z] coordinates of each aa
  22.638 -###     
  22.639 +###
  22.640  ###             """
  22.641 -###             
  22.642 +###
  22.643  ###             ID = self.to_ID(pdb_code, pdb_chain, model)
  22.644 -###             
  22.645 +###
  22.646  ###             self.structure[ID], self.seq[ID] = read_pdb(pdb_code, pdb_chain, pdb_text, model)
  22.647 -###             
  22.648 -###     
  22.649 -###             
  22.650 +###
  22.651 +###
  22.652 +###
  22.653  
  22.654  
  22.655 -        
  22.656  
  22.657  
  22.658  
  22.659 @@ -689,8 +688,9 @@
  22.660  
  22.661  
  22.662  
  22.663 -        
  22.664 -            
  22.665 +
  22.666 +
  22.667 +
  22.668  
  22.669  
  22.670  
  22.671 @@ -730,10 +730,10 @@
  22.672  
  22.673  
  22.674          # At first, calculate conformity without gapes
  22.675 -        
  22.676 +
  22.677          connections0 = [] # key - seq_part position, value - position in seq_part without gapes
  22.678          part_usage = 0 # how many aa from seqpart were used
  22.679 -        
  22.680 +
  22.681          for i in xrange(0, len(seq_part)):
  22.682              s = seq_part[i]
  22.683              if s == '-':
  22.684 @@ -741,15 +741,15 @@
  22.685              else:
  22.686                  connections0.append(part_usage)
  22.687                  part_usage += 1
  22.688 -            
  22.689 -        
  22.690  
  22.691  
  22.692  
  22.693  
  22.694  
  22.695  
  22.696 -        
  22.697 +
  22.698 +
  22.699 +
  22.700  
  22.701          connections1 = []
  22.702  
  22.703 @@ -769,7 +769,7 @@
  22.704                  connections1.append('-')
  22.705  #                print 'Can not find', AA_part, len(connections1), 'of', seq_part, 'in', seq_all
  22.706                  continue
  22.707 -        
  22.708 +
  22.709  
  22.710              connections1.append(all_usage - 1)
  22.711  
  22.712 @@ -788,10 +788,10 @@
  22.713  
  22.714  
  22.715  
  22.716 -        
  22.717 -            
  22.718 -            
  22.719 -            
  22.720 +
  22.721 +
  22.722 +
  22.723 +
  22.724  
  22.725  
  22.726  
  22.727 @@ -805,51 +805,51 @@
  22.728  
  22.729          """
  22.730          Calculates distances
  22.731 -        
  22.732 +
  22.733          adds new elements to self.d
  22.734          """
  22.735  
  22.736  
  22.737          # Atom count
  22.738          aton_count = len(self.structure1.values()[0])
  22.739 -        
  22.740 +
  22.741          for ID, structure in self.structure1.items():
  22.742 -            
  22.743 +
  22.744              if self.d.has_key(ID):
  22.745                  continue # already done
  22.746 -                
  22.747 +
  22.748              self.d[ID] = []
  22.749  
  22.750  
  22.751 -            # create distance matrix 
  22.752 +            # create distance matrix
  22.753              for atom1 in xrange(0, aton_count):
  22.754                  self.d[ID].append([])
  22.755                  for atom2 in xrange(0, aton_count):
  22.756                      self.d[ID][atom1].append(None)
  22.757  
  22.758  
  22.759 -            # done distance matrix 
  22.760 -            
  22.761 +            # done distance matrix
  22.762 +
  22.763              for atom1 in xrange(0, aton_count):
  22.764                  for atom2 in xrange(0, aton_count):
  22.765  
  22.766                      if atom2 == atom1:
  22.767                          break
  22.768 -                        
  22.769 +
  22.770                      if structure[atom1] and structure[atom2]:
  22.771  
  22.772                          dist = 0
  22.773  
  22.774                          for xyz in xrange(0, 3):
  22.775 -                           
  22.776 +
  22.777                              dist += (structure[atom1][xyz] - structure[atom2][xyz]) ** 2
  22.778  
  22.779                          dist = dist ** 0.5
  22.780  
  22.781                          self.d[ID][atom1][atom2] = dist
  22.782                          self.d[ID][atom2][atom1] = dist
  22.783 -         
  22.784 -            
  22.785 +
  22.786 +
  22.787  
  22.788  
  22.789  
  22.790 @@ -865,12 +865,12 @@
  22.791  
  22.792          Arguments:
  22.793              IDs - list of IDs to study
  22.794 -            
  22.795 -            
  22.796 +
  22.797 +
  22.798          Result:
  22.799          returns [alignment core graph, cost graf]
  22.800  
  22.801 -        
  22.802 +
  22.803              alignment core graph example:
  22.804              graf[0][1] = 1    0 and 1 positions are connected
  22.805              graf[0][1] = 0    0 and 1 positions are not connected
  22.806 @@ -882,19 +882,19 @@
  22.807  
  22.808          graf = {}
  22.809  
  22.810 -        cost = {} # distance spreading 
  22.811 +        cost = {} # distance spreading
  22.812  
  22.813 -        
  22.814 -        
  22.815 -        
  22.816 -        
  22.817 +
  22.818 +
  22.819 +
  22.820 +
  22.821          for atom1 in xrange(0, aton_count):
  22.822  
  22.823 -            
  22.824 +
  22.825              graf[atom1] = {}
  22.826  
  22.827              cost[atom1] = {}
  22.828 -            
  22.829 +
  22.830              for atom2 in xrange(0, aton_count):
  22.831  
  22.832  
  22.833 @@ -902,26 +902,26 @@
  22.834                      graf[atom1][atom2] = 1
  22.835                      break
  22.836  
  22.837 -                
  22.838 +
  22.839                  distances = []
  22.840 -                
  22.841 +
  22.842                  for ID in IDs:
  22.843                      distances.append(self.d[ID][atom1][atom2])
  22.844  
  22.845 -                
  22.846 +
  22.847                  if distances.count(None):
  22.848                      graf[atom1][atom2] = 0
  22.849                      graf[atom2][atom1] = 0
  22.850  
  22.851                  else:
  22.852 -                    
  22.853 +
  22.854                      spreading = max(distances) - min(distances)
  22.855  
  22.856                      if spreading > self.delta:
  22.857 -                    
  22.858 +
  22.859                          graf[atom1][atom2] = 0
  22.860                          graf[atom2][atom1] = 0
  22.861 -                        
  22.862 +
  22.863                      else:
  22.864                          graf[atom1][atom2] = 1
  22.865                          graf[atom2][atom1] = 1
  22.866 @@ -929,7 +929,7 @@
  22.867                          cost[atom1][atom2] = -spreading
  22.868                          cost[atom2][atom1] = -spreading
  22.869  
  22.870 -                        
  22.871 +
  22.872  
  22.873          return [graf, cost]
  22.874  
  22.875 @@ -966,7 +966,7 @@
  22.876              if self.alternative_core_new_atoms:
  22.877  
  22.878                  # compare this core with all added cores
  22.879 -            
  22.880 +
  22.881                  for one_core in new_cores:
  22.882                      new_atoms = 0
  22.883  
  22.884 @@ -980,15 +980,12 @@
  22.885  
  22.886  
  22.887              # if this core is good
  22.888 -            if ok == 1:            
  22.889 +            if ok == 1:
  22.890                  new_cores.append(core)
  22.891  
  22.892  
  22.893  
  22.894          return new_cores
  22.895 -                
  22.896 -            
  22.897 -            
  22.898  
  22.899  
  22.900  
  22.901 @@ -1006,3 +1003,6 @@
  22.902  
  22.903  
  22.904  
  22.905 +
  22.906 +
  22.907 +
    23.1 --- a/blocks3d/Kliki.py	Mon Jan 24 21:40:10 2011 +0300
    23.2 +++ b/blocks3d/Kliki.py	Tue Jan 25 16:03:00 2011 +0300
    23.3 @@ -34,14 +34,14 @@
    23.4  
    23.5      compsub - ?????? ??????? ??? ??????? ????
    23.6      """
    23.7 -            
    23.8  
    23.9 -        
   23.10  
   23.11  
   23.12  
   23.13  
   23.14 -    
   23.15 +
   23.16 +
   23.17 +
   23.18      def __init__ (self, graf, cost = None, limit_count=0, min_size=0, timeout=10):
   23.19  
   23.20          """
   23.21 @@ -57,7 +57,7 @@
   23.22              ?????? ???? ?????? ????? ??????????.
   23.23              ??? ?????? ????, ??? ???? ??????? ??? ?????
   23.24              ???????????? ??? fast_algorithm ? ??? ?????????? ????
   23.25 -            
   23.26 +
   23.27  
   23.28          limit_count - ???????????? ????? ????, ??????? ?????
   23.29              ???? ?????? 0, ?? ??????????? ??? ?????
   23.30 @@ -65,15 +65,15 @@
   23.31          min_size - min size of returning klika
   23.32  
   23.33          timeout - time in sec. for BRON-KERBOSH algorithm
   23.34 -            
   23.35 +
   23.36          """
   23.37  
   23.38 -        
   23.39 +
   23.40  
   23.41          self.graf = graf
   23.42          self.cost = cost
   23.43  
   23.44 -        
   23.45 +
   23.46          self.kliki = []
   23.47  
   23.48          self.timeout = timeout
   23.49 @@ -95,40 +95,40 @@
   23.50          while deleted:
   23.51  
   23.52              deleted = 0
   23.53 -            
   23.54 +
   23.55              for atom, c in connections.items():
   23.56 -                
   23.57 +
   23.58                  if c < min_size:
   23.59 -                    
   23.60 +
   23.61                      del connections[atom]
   23.62 -                    
   23.63 +
   23.64                      for atom1, connect in graf[atom].items():
   23.65                          if connect == 1 and connections.has_key(atom1):
   23.66                              connections[atom1] -= 1
   23.67                              deleted = 1
   23.68  
   23.69 -                
   23.70 -                        
   23.71 -        
   23.72 +
   23.73 +
   23.74 +
   23.75  
   23.76          bank_l = {}
   23.77 -        
   23.78 +
   23.79          for atom, c in connections.items():
   23.80 -            
   23.81 +
   23.82              if not bank_l.has_key(c):
   23.83                  bank_l[c] = []
   23.84 -                
   23.85 +
   23.86              bank_l[c].append(atom)
   23.87  
   23.88  
   23.89 -        keys = []            
   23.90 -        
   23.91 +        keys = []
   23.92 +
   23.93          if len(bank_l.keys()):
   23.94              for c in xrange(min(bank_l.keys()), max(bank_l.keys())+1):
   23.95                  if bank_l.has_key(c):
   23.96                      keys.extend(bank_l[c])
   23.97 -            
   23.98 -            
   23.99 +
  23.100 +
  23.101  
  23.102  
  23.103          # RUN BRON-KERBOSH
  23.104 @@ -141,7 +141,7 @@
  23.105              self.fast_algorithm(keys[:]) # run fast algorithm
  23.106  
  23.107  
  23.108 -        
  23.109 +
  23.110  
  23.111  
  23.112          # ?????????? ?????????? ????? ?? ???????? ????? ????????? ? ???
  23.113 @@ -154,7 +154,7 @@
  23.114  ##        min_l=len(self.kliki[0]) # ??????????? ??????
  23.115  
  23.116          bank_l = {}
  23.117 -        
  23.118 +
  23.119          for klika in self.kliki:
  23.120              klika.sort()
  23.121              l = len(klika) # ????? ??????? ?????
  23.122 @@ -173,12 +173,12 @@
  23.123          kliki=[]
  23.124  
  23.125          #print self.cost
  23.126 -        
  23.127 +
  23.128          if len(bank_l.keys()):
  23.129 -            
  23.130 +
  23.131              r = range(min(bank_l.keys()), max(bank_l.keys())+1)
  23.132              r.reverse()
  23.133 -            
  23.134 +
  23.135              for l in r:
  23.136                  if (bank_l.has_key(l)):
  23.137  
  23.138 @@ -191,7 +191,7 @@
  23.139  
  23.140                          # ??????????? ?? ?? ???????? ????? ???? cost
  23.141  
  23.142 -                       
  23.143 +
  23.144  
  23.145                          costs = []
  23.146  
  23.147 @@ -200,7 +200,7 @@
  23.148                              c = 0
  23.149  
  23.150                              for i in klika:
  23.151 -                                
  23.152 +
  23.153                                  if not self.cost.has_key(i):
  23.154                                      continue
  23.155  
  23.156 @@ -209,7 +209,7 @@
  23.157                                      if j == i:
  23.158                                          break
  23.159  
  23.160 -                                    
  23.161 +
  23.162                                      if not self.cost[i].has_key(j):
  23.163                                          continue
  23.164  
  23.165 @@ -233,15 +233,15 @@
  23.166                              del costs[n]
  23.167  
  23.168                          k = k1
  23.169 -                    
  23.170 +
  23.171                      kliki.extend(k)
  23.172 -                
  23.173 +
  23.174  ##        kliki.reverse()
  23.175  
  23.176          if limit_count:
  23.177              if len(kliki) > limit_count: # ??????? ??????????? ?? ????? ????
  23.178                  kliki = kliki[:limit_count]
  23.179 -            
  23.180 +
  23.181  
  23.182          self.kliki = kliki[:]
  23.183  
  23.184 @@ -249,10 +249,10 @@
  23.185  
  23.186  
  23.187  
  23.188 -        
  23.189 -            
  23.190  
  23.191 -    
  23.192 +
  23.193 +
  23.194 +
  23.195  
  23.196      def bron_kerbosh (self, keys):
  23.197  
  23.198 @@ -273,9 +273,9 @@
  23.199  
  23.200  
  23.201          print 'Bron and Kerbosh algorithm started'
  23.202 -        
  23.203 +
  23.204          start_time = time.time()
  23.205 -    
  23.206 +
  23.207          # ????...
  23.208          while 1:
  23.209  
  23.210 @@ -284,14 +284,14 @@
  23.211  
  23.212  
  23.213  
  23.214 -            
  23.215 +
  23.216  
  23.217              # ???????? candidates ? used ?? ??????
  23.218  
  23.219              #print depth
  23.220 -            
  23.221 +
  23.222              candidates = list_candidates[depth][:]
  23.223 -            used = list_used[depth][:]          
  23.224 +            used = list_used[depth][:]
  23.225  
  23.226  
  23.227  
  23.228 @@ -301,25 +301,25 @@
  23.229                  depth -= 1
  23.230  
  23.231                  if compsub:
  23.232 -                    compsub.pop()                
  23.233 +                    compsub.pop()
  23.234                  continue
  23.235  
  23.236 -                       
  23.237  
  23.238  
  23.239 -            
  23.240 -            
  23.241 +
  23.242 +
  23.243 +
  23.244              # ? used ?? ???????? ???????, ??????????? ?? ????? ????????? ?? candidates
  23.245              # (??? ?? used ?? ????????? ???? ?? ? 1 ?? candidates)
  23.246  
  23.247              used_candidates = 0
  23.248 -        
  23.249 +
  23.250              for used1 in used:
  23.251                  for candidates1 in candidates:
  23.252                      if self.graf[used1][candidates1] == 0:
  23.253                          break
  23.254                  else:
  23.255 -                    used_candidates = 1             
  23.256 +                    used_candidates = 1
  23.257  
  23.258              if used_candidates:
  23.259                  depth -= 1
  23.260 @@ -327,8 +327,8 @@
  23.261                  if compsub:
  23.262                      compsub.pop()
  23.263                  continue
  23.264 -                
  23.265 -       
  23.266 +
  23.267 +
  23.268  
  23.269  
  23.270  
  23.271 @@ -341,7 +341,7 @@
  23.272  
  23.273  
  23.274  
  23.275 -            
  23.276 +
  23.277              # ????????? new_candidates ? new_used, ?????? ?? candidates ? used ???????, ?? ?????????? ? v
  23.278              # (?? ????, ???????? ?????? ?????????? ? v)
  23.279              new_candidates = []
  23.280 @@ -349,7 +349,7 @@
  23.281                  if self.graf[candidates1][v] == 1 and candidates1 != v:
  23.282                      new_candidates.append(candidates1)
  23.283  
  23.284 -                                                            
  23.285 +
  23.286              new_used = []
  23.287              for used1 in used:
  23.288                  if self.graf[used1][v] == 1 and used1 != v:
  23.289 @@ -358,7 +358,7 @@
  23.290  
  23.291  
  23.292  
  23.293 -            # ??????? v ?? candidates ? ???????? ? used                   
  23.294 +            # ??????? v ?? candidates ? ???????? ? used
  23.295              del list_candidates[depth][0]
  23.296              list_used[depth].append(v)
  23.297  
  23.298 @@ -367,33 +367,33 @@
  23.299              if len(new_candidates) == 0 and len(new_used) == 0:
  23.300                  # compsub ? ?????
  23.301                  self.kliki.append(compsub[:])
  23.302 -                
  23.303 +
  23.304              else:
  23.305                  # ????? ?????????? ???????? bron_kerbosh(new_candidates, new_used)
  23.306  
  23.307                  depth += 1
  23.308 -                
  23.309 +
  23.310  
  23.311                  # TIMEOUT check start
  23.312                  if self.timeout != -1:
  23.313 -                    
  23.314 +
  23.315                      if time.time() - start_time > self.timeout:
  23.316  
  23.317                          self.kliki = []
  23.318                          return
  23.319                  # TIMEOUT check end
  23.320  
  23.321 -                    
  23.322 -                    
  23.323 -                
  23.324 +
  23.325 +
  23.326 +
  23.327                  if depth >= len(list_candidates):
  23.328                      list_candidates.append([])
  23.329                      list_used.append([])
  23.330  
  23.331 -                
  23.332 +
  23.333                  list_candidates[depth] = new_candidates[:]
  23.334                  list_used[depth] = new_used[:]
  23.335 -                
  23.336 +
  23.337                  continue
  23.338  
  23.339  
  23.340 @@ -442,7 +442,7 @@
  23.341  
  23.342                  if not excluded.has_key(i):
  23.343                      keys1.append(i)
  23.344 -                
  23.345 +
  23.346              if len(keys1) == 0:
  23.347                  break
  23.348  
  23.349 @@ -454,9 +454,9 @@
  23.350                  connections = {} # index - atom, value -  connections value
  23.351  
  23.352                  for i in keys1:
  23.353 -                    
  23.354 +
  23.355                      connections[i] = 0
  23.356 -                    
  23.357 +
  23.358                      for j in keys1:
  23.359  
  23.360                          if i != j and self.graf[i][j]:
  23.361 @@ -466,8 +466,8 @@
  23.362                  if max(connections.values()) == min(connections.values()):
  23.363                      # all atoms are equal
  23.364                      break
  23.365 -                
  23.366 -                                                    
  23.367 +
  23.368 +
  23.369                  exclude_connect = min(connections.values()) # excluded atoms connections
  23.370  
  23.371  
  23.372 @@ -479,7 +479,7 @@
  23.373                      for i in keys1:
  23.374  
  23.375                          cost_sum[i] = 0
  23.376 -                        
  23.377 +
  23.378                          if connections[i] == exclude_connect:
  23.379  
  23.380                              for j in keys1:
  23.381 @@ -495,13 +495,13 @@
  23.382                      keys2 = []
  23.383  
  23.384                      for i in keys1:
  23.385 -                        
  23.386 +
  23.387                          if connections[i] == exclude_connect:
  23.388  
  23.389                              if cost_sum[i] == exclude_cost:
  23.390  
  23.391                                  continue
  23.392 -                        
  23.393 +
  23.394                          keys2.append(i)
  23.395  
  23.396                      keys1 = clon(keys2)
  23.397 @@ -511,11 +511,11 @@
  23.398                      keys2 = []
  23.399  
  23.400                      for i in keys1:
  23.401 -                        
  23.402 +
  23.403                          if connections[i] == exclude_connect:
  23.404  
  23.405                              continue
  23.406 -                        
  23.407 +
  23.408                          keys2.append(i)
  23.409  
  23.410                      keys1 = clon(keys2)
  23.411 @@ -546,22 +546,22 @@
  23.412  
  23.413  
  23.414                          break
  23.415 -                        
  23.416 -                    
  23.417 +
  23.418 +
  23.419                  else:
  23.420                      # no new atoms
  23.421                      break
  23.422 -                    
  23.423 -            
  23.424 +
  23.425 +
  23.426  
  23.427              # keys1 is klika
  23.428  
  23.429              self.kliki.append(keys1[:])
  23.430  
  23.431 -            
  23.432  
  23.433 -            
  23.434 -                
  23.435 -                
  23.436  
  23.437 -        
  23.438 +
  23.439 +
  23.440 +
  23.441 +
  23.442 +
    24.1 --- a/blocks3d/clon.py	Mon Jan 24 21:40:10 2011 +0300
    24.2 +++ b/blocks3d/clon.py	Tue Jan 25 16:03:00 2011 +0300
    24.3 @@ -10,9 +10,9 @@
    24.4  
    24.5  
    24.6      if t == list or t == tuple:
    24.7 -        
    24.8 +
    24.9          r = []
   24.10 -        
   24.11 +
   24.12          for i in obj:
   24.13              r.append(clon(i))
   24.14  
   24.15 @@ -23,9 +23,9 @@
   24.16  
   24.17  
   24.18      if t == dict:
   24.19 -        
   24.20 +
   24.21          r = {}
   24.22 -        
   24.23 +
   24.24          for key, value in obj.items():
   24.25              r[key] = clon(value)
   24.26  
   24.27 @@ -34,5 +34,5 @@
   24.28  
   24.29      return obj
   24.30  
   24.31 -            
   24.32 -    
   24.33 +
   24.34 +
    25.1 --- a/blocks3d/wt/blocks3d-wt.pro	Mon Jan 24 21:40:10 2011 +0300
    25.2 +++ b/blocks3d/wt/blocks3d-wt.pro	Tue Jan 25 16:03:00 2011 +0300
    25.3 @@ -1,6 +1,5 @@
    25.4  TARGET = blocks3d-wt.exe
    25.5  
    25.6 -SOURCES += config.C
    25.7  SOURCES += mktemp_string.C
    25.8  SOURCES += blocks3d-wt.C
    25.9  SOURCES += blocks3d-wt-widget.C
   25.10 @@ -21,4 +20,4 @@
   25.11  }
   25.12  
   25.13  INCLUDEPATH = /usr/local/include/Wt/
   25.14 -LIBS += -lwthttp -lwt
   25.15 +LIBS += -lwthttp -lwt -lboost_signals
    26.1 --- a/blocks3d/wt/compile.sh	Mon Jan 24 21:40:10 2011 +0300
    26.2 +++ b/blocks3d/wt/compile.sh	Tue Jan 25 16:03:00 2011 +0300
    26.3 @@ -1,4 +1,5 @@
    26.4 -cd build;
    26.5 +mkdir -p build
    26.6 +cd build
    26.7  qmake ../blocks3d-wt.pro && make VERBOSE=0
    26.8  #~ cmake .. && make VERBOSE=1
    26.9  
    27.1 --- a/blocks3d/wt/config.C	Mon Jan 24 21:40:10 2011 +0300
    27.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.3 @@ -1,36 +0,0 @@
    27.4 -
    27.5 -#include "config.h"
    27.6 -#include <boost/format.hpp>
    27.7 -#include <string>
    27.8 -
    27.9 -
   27.10 -namespace Malakite
   27.11 -{
   27.12 -
   27.13 -const char* python_path = "python";
   27.14 -const char* blocks3d_path = "/home/boris/my-code/Blocks3D/Blocks3D_v5/Blocks3D.py";
   27.15 -const char* temp_path = "/tmp";
   27.16 -const char* temp_mkstemp_path = "/tmp/XXXXXX";
   27.17 -
   27.18 -const char* main_url = "/";
   27.19 -
   27.20 -//~ const char* tempfile_alignment = "in_aln.fasta";
   27.21 -//~ const char* tempfile_conformity = "in_conf.fasta";
   27.22 -//~ const char* tempfile_html = "out.html";
   27.23 -//~ const char* tempfile_check = "check.lock";
   27.24 -const char* tempfile_sh = "run.sh";
   27.25 -
   27.26 -namespace defaults
   27.27 -{
   27.28 -
   27.29 -const char* delta = "2.0";
   27.30 -const char* min_block_width = "3";
   27.31 -const char* timeout = "10";
   27.32 -const char* timeout2 = "10";
   27.33 -
   27.34 -}
   27.35 -
   27.36 -
   27.37 -
   27.38 -}
   27.39 -
    28.1 --- a/blocks3d/wt/config.h	Mon Jan 24 21:40:10 2011 +0300
    28.2 +++ b/blocks3d/wt/config.h	Tue Jan 25 16:03:00 2011 +0300
    28.3 @@ -3,34 +3,34 @@
    28.4  
    28.5  namespace Malakite
    28.6  {
    28.7 -
    28.8 +    
    28.9  const int refresh_period = 5*1000;
   28.10 -
   28.11  const unsigned int max_simultaneous_tasks = 100;
   28.12  
   28.13 -extern const char* python_path;
   28.14 -extern const char* blocks3d_path;
   28.15 -extern const char* temp_path;
   28.16 -extern const char* temp_mkstemp_path;
   28.17 +const char* const python_path = "python";
   28.18 +const char* const blocks3d_path = "/home/boris/my-code/Blocks3D/Blocks3D_v5/Blocks3D.py";
   28.19 +const char* const temp_path = "/tmp";
   28.20 +const char* const temp_mkstemp_path = "/tmp/XXXXXX";
   28.21  
   28.22 -extern const char* main_url;
   28.23 +const char* const main_url = "/";
   28.24  
   28.25 -//~ extern const char* tempfile_alignment;
   28.26 -//~ extern const char* tempfile_conformity;
   28.27 -//~ extern const char* tempfile_html;
   28.28 -//~ extern const char* tempfile_check;
   28.29 -extern const char* tempfile_sh;
   28.30 +const char* const tempfile_sh = "run.sh";
   28.31  
   28.32  namespace defaults
   28.33  {
   28.34  
   28.35 -extern const char* delta;
   28.36 -extern const char* min_block_width;
   28.37 -extern const char* timeout;
   28.38 -extern const char* timeout2;
   28.39 +const char* const delta = "2.0";
   28.40 +const char* const min_block_width = "3";
   28.41 +const char* const timeout = "10";
   28.42 +const char* const timeout2 = "10";
   28.43  
   28.44  }
   28.45  
   28.46 +
   28.47 +
   28.48  }
   28.49  
   28.50 +
   28.51 +
   28.52 +
   28.53  #endif // MALAKITE_BLOCKS3D_CONFIG_H_s
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/debian/changelog	Tue Jan 25 16:03:00 2011 +0300
    29.3 @@ -0,0 +1,15 @@
    29.4 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru>  Mon, 22 Nov 2010 14:03:22 +0300
    29.5 +
    29.6 +allpy (0.0-3) unstable; urgency=medium
    29.7 +
    29.8 +  * Fixed syntax errors in source to soothe postinst scripts.
    29.9 +
   29.10 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru>  Mon, 22 Nov 2010 13:43:59 +0300
   29.11 +
   29.12 +allpy (0.0-2) unstable; urgency=medium
   29.13 +
   29.14 +  * Initial debianization.
   29.15 +
   29.16 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru>  Mon, 15 Nov 2010 17:40:37 +0300
   29.17 +
   29.18 +vim: set ft=debchangelog et ai:
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/debian/compat	Tue Jan 25 16:03:00 2011 +0300
    30.3 @@ -0,0 +1,1 @@
    30.4 +7
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/debian/control.in	Tue Jan 25 16:03:00 2011 +0300
    31.3 @@ -0,0 +1,16 @@
    31.4 +Source: allpy
    31.5 +Section: science
    31.6 +Priority: optional
    31.7 +Maintainer: Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru>
    31.8 +Standards-Version: 3.6.1
    31.9 +Build-Depends: @cdbs@
   31.10 +
   31.11 +Package: python-allpy
   31.12 +Architecture: all
   31.13 +Depends: python (>> 2.5)
   31.14 +Description: Python library for working with alignments
   31.15 +
   31.16 +Package: geometrical-core
   31.17 +Architecture: all
   31.18 +Depends: python-allpy
   31.19 +Description: Find geometrical core in a multiple alignment
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/debian/geometrical-core.install	Tue Jan 25 16:03:00 2011 +0300
    32.3 @@ -0,0 +1,1 @@
    32.4 +geometrical_core/geometrical-core usr/bin
    33.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.2 +++ b/debian/rules	Tue Jan 25 16:03:00 2011 +0300
    33.3 @@ -0,0 +1,8 @@
    33.4 +#!/usr/bin/make -f
    33.5 +DEB_AUTO_UPDATE_DEBIAN_CONTROL = yes
    33.6 +DEB_PYTHON_SYSTEM = pysupport
    33.7 +include /usr/share/cdbs/1/rules/debhelper.mk
    33.8 +include /usr/share/cdbs/1/class/python-distutils.mk
    33.9 +
   33.10 +DEB_COMPRESS_EXCLUDE := .py
   33.11 +DEB_DESTDIR = debian/python-allpy
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/docs/Makefile	Tue Jan 25 16:03:00 2011 +0300
    34.3 @@ -0,0 +1,89 @@
    34.4 +# Makefile for Sphinx documentation
    34.5 +#
    34.6 +
    34.7 +# You can set these variables from the command line.
    34.8 +SPHINXOPTS    =
    34.9 +SPHINXBUILD   = sphinx-build
   34.10 +PAPER         =
   34.11 +BUILDDIR      = build
   34.12 +
   34.13 +# Internal variables.
   34.14 +PAPEROPT_a4     = -D latex_paper_size=a4
   34.15 +PAPEROPT_letter = -D latex_paper_size=letter
   34.16 +ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
   34.17 +
   34.18 +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
   34.19 +
   34.20 +help:
   34.21 +	@echo "Please use \`make <target>' where <target> is one of"
   34.22 +	@echo "  html      to make standalone HTML files"
   34.23 +	@echo "  dirhtml   to make HTML files named index.html in directories"
   34.24 +	@echo "  pickle    to make pickle files"
   34.25 +	@echo "  json      to make JSON files"
   34.26 +	@echo "  htmlhelp  to make HTML files and a HTML help project"
   34.27 +	@echo "  qthelp    to make HTML files and a qthelp project"
   34.28 +	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
   34.29 +	@echo "  changes   to make an overview of all changed/added/deprecated items"
   34.30 +	@echo "  linkcheck to check all external links for integrity"
   34.31 +	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
   34.32 +
   34.33 +clean:
   34.34 +	-rm -rf $(BUILDDIR)/*
   34.35 +
   34.36 +html:
   34.37 +	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
   34.38 +	@echo
   34.39 +	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
   34.40 +
   34.41 +dirhtml:
   34.42 +	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
   34.43 +	@echo
   34.44 +	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
   34.45 +
   34.46 +pickle:
   34.47 +	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
   34.48 +	@echo
   34.49 +	@echo "Build finished; now you can process the pickle files."
   34.50 +
   34.51 +json:
   34.52 +	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
   34.53 +	@echo
   34.54 +	@echo "Build finished; now you can process the JSON files."
   34.55 +
   34.56 +htmlhelp:
   34.57 +	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
   34.58 +	@echo
   34.59 +	@echo "Build finished; now you can run HTML Help Workshop with the" \
   34.60 +	      ".hhp project file in $(BUILDDIR)/htmlhelp."
   34.61 +
   34.62 +qthelp:
   34.63 +	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
   34.64 +	@echo
   34.65 +	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
   34.66 +	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
   34.67 +	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/allpy.qhcp"
   34.68 +	@echo "To view the help file:"
   34.69 +	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/allpy.qhc"
   34.70 +
   34.71 +latex:
   34.72 +	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
   34.73 +	@echo
   34.74 +	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
   34.75 +	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
   34.76 +	      "run these through (pdf)latex."
   34.77 +
   34.78 +changes:
   34.79 +	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
   34.80 +	@echo
   34.81 +	@echo "The overview file is in $(BUILDDIR)/changes."
   34.82 +
   34.83 +linkcheck:
   34.84 +	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
   34.85 +	@echo
   34.86 +	@echo "Link check complete; look for any errors in the above output " \
   34.87 +	      "or in $(BUILDDIR)/linkcheck/output.txt."
   34.88 +
   34.89 +doctest:
   34.90 +	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
   34.91 +	@echo "Testing of doctests in the sources finished, look at the " \
   34.92 +	      "results in $(BUILDDIR)/doctest/output.txt."
    35.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.2 +++ b/docs/source/allpy/base.rst	Tue Jan 25 16:03:00 2011 +0300
    35.3 @@ -0,0 +1,12 @@
    35.4 +Base Documentation
    35.5 +==================
    35.6 +
    35.7 +This page contains the Base Module documentation.
    35.8 +
    35.9 +The :mod:`base` Module
   35.10 +----------------------
   35.11 +
   35.12 +.. automodule:: allpy.base
   35.13 +    :members:
   35.14 +    :undoc-members:
   35.15 +    :show-inheritance:
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/docs/source/allpy/data.rst	Tue Jan 25 16:03:00 2011 +0300
    36.3 @@ -0,0 +1,5 @@
    36.4 +Data Documentation
    36.5 +==================
    36.6 +
    36.7 +This page contains the Data Package documentation.
    36.8 +
    37.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.2 +++ b/docs/source/allpy/dna.rst	Tue Jan 25 16:03:00 2011 +0300
    37.3 @@ -0,0 +1,12 @@
    37.4 +Dna Documentation
    37.5 +=================
    37.6 +
    37.7 +This page contains the Dna Module documentation.
    37.8 +
    37.9 +The :mod:`dna` Module
   37.10 +---------------------
   37.11 +
   37.12 +.. automodule:: allpy.dna
   37.13 +    :members:
   37.14 +    :undoc-members:
   37.15 +    :show-inheritance:
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/docs/source/allpy/fasta.rst	Tue Jan 25 16:03:00 2011 +0300
    38.3 @@ -0,0 +1,12 @@
    38.4 +Fasta Documentation
    38.5 +===================
    38.6 +
    38.7 +This page contains the Fasta Module documentation.
    38.8 +
    38.9 +The :mod:`fasta` Module
   38.10 +-----------------------
   38.11 +
   38.12 +.. automodule:: allpy.fasta
   38.13 +    :members:
   38.14 +    :undoc-members:
   38.15 +    :show-inheritance:
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/docs/source/allpy/graph.rst	Tue Jan 25 16:03:00 2011 +0300
    39.3 @@ -0,0 +1,12 @@
    39.4 +Graph Documentation
    39.5 +===================
    39.6 +
    39.7 +This page contains the Graph Module documentation.
    39.8 +
    39.9 +The :mod:`graph` Module
   39.10 +-----------------------
   39.11 +
   39.12 +.. automodule:: allpy.graph
   39.13 +    :members:
   39.14 +    :undoc-members:
   39.15 +    :show-inheritance:
    40.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.2 +++ b/docs/source/allpy/modules.rst	Tue Jan 25 16:03:00 2011 +0300
    40.3 @@ -0,0 +1,19 @@
    40.4 +Project Documentation
    40.5 +=====================
    40.6 +
    40.7 +This page contains the Project Modules documentation.
    40.8 +
    40.9 +Modules:
   40.10 +--------
   40.11 +
   40.12 +.. toctree::
   40.13 +   :maxdepth: 4
   40.14 +
   40.15 +   base
   40.16 +   data
   40.17 +   dna
   40.18 +   fasta
   40.19 +   graph
   40.20 +   pdb
   40.21 +   protein
   40.22 +   rna
    41.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.2 +++ b/docs/source/allpy/pdb.rst	Tue Jan 25 16:03:00 2011 +0300
    41.3 @@ -0,0 +1,12 @@
    41.4 +Pdb Documentation
    41.5 +=================
    41.6 +
    41.7 +This page contains the Pdb Module documentation.
    41.8 +
    41.9 +The :mod:`pdb` Module
   41.10 +---------------------
   41.11 +
   41.12 +.. automodule:: allpy.pdb
   41.13 +    :members:
   41.14 +    :undoc-members:
   41.15 +    :show-inheritance:
    42.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.2 +++ b/docs/source/allpy/protein.rst	Tue Jan 25 16:03:00 2011 +0300
    42.3 @@ -0,0 +1,12 @@
    42.4 +Protein Documentation
    42.5 +=====================
    42.6 +
    42.7 +This page contains the Protein Module documentation.
    42.8 +
    42.9 +The :mod:`protein` Module
   42.10 +-------------------------
   42.11 +
   42.12 +.. automodule:: allpy.protein
   42.13 +    :members:
   42.14 +    :undoc-members:
   42.15 +    :show-inheritance:
    43.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.2 +++ b/docs/source/allpy/rna.rst	Tue Jan 25 16:03:00 2011 +0300
    43.3 @@ -0,0 +1,12 @@
    43.4 +Rna Documentation
    43.5 +=================
    43.6 +
    43.7 +This page contains the Rna Module documentation.
    43.8 +
    43.9 +The :mod:`rna` Module
   43.10 +---------------------
   43.11 +
   43.12 +.. automodule:: allpy.rna
   43.13 +    :members:
   43.14 +    :undoc-members:
   43.15 +    :show-inheritance:
    44.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.2 +++ b/docs/source/conf.py	Tue Jan 25 16:03:00 2011 +0300
    44.3 @@ -0,0 +1,194 @@
    44.4 +# -*- coding: utf-8 -*-
    44.5 +#
    44.6 +# allpy documentation build configuration file, created by
    44.7 +# sphinx-quickstart on Mon Dec 13 22:14:23 2010.
    44.8 +#
    44.9 +# This file is execfile()d with the current directory set to its containing dir.
   44.10 +#
   44.11 +# Note that not all possible configuration values are present in this
   44.12 +# autogenerated file.
   44.13 +#
   44.14 +# All configuration values have a default; values that are commented out
   44.15 +# serve to show the default.
   44.16 +
   44.17 +import sys, os
   44.18 +
   44.19 +# If extensions (or modules to document with autodoc) are in another directory,
   44.20 +# add these directories to sys.path here. If the directory is relative to the
   44.21 +# documentation root, use os.path.abspath to make it absolute, like shown here.
   44.22 +#sys.path.append(os.path.abspath('.'))
   44.23 +
   44.24 +# -- General configuration -----------------------------------------------------
   44.25 +
   44.26 +# Add any Sphinx extension module names here, as strings. They can be extensions
   44.27 +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
   44.28 +extensions = ['sphinx.ext.autodoc']
   44.29 +
   44.30 +# Add any paths that contain templates here, relative to this directory.
   44.31 +templates_path = ['_templates']
   44.32 +
   44.33 +# The suffix of source filenames.
   44.34 +source_suffix = '.rst'
   44.35 +
   44.36 +# The encoding of source files.
   44.37 +#source_encoding = 'utf-8'
   44.38 +
   44.39 +# The master toctree document.
   44.40 +master_doc = 'index'
   44.41 +
   44.42 +# General information about the project.
   44.43 +project = u'allpy'
   44.44 +copyright = u'2010, many people'
   44.45 +
   44.46 +# The version info for the project you're documenting, acts as replacement for
   44.47 +# |version| and |release|, also used in various other places throughout the
   44.48 +# built documents.
   44.49 +#
   44.50 +# The short X.Y version.
   44.51 +version = '0.0'
   44.52 +# The full version, including alpha/beta/rc tags.
   44.53 +release = '0.0'
   44.54 +
   44.55 +# The language for content autogenerated by Sphinx. Refer to documentation
   44.56 +# for a list of supported languages.
   44.57 +#language = None
   44.58 +
   44.59 +# There are two options for replacing |today|: either, you set today to some
   44.60 +# non-false value, then it is used:
   44.61 +#today = ''
   44.62 +# Else, today_fmt is used as the format for a strftime call.
   44.63 +#today_fmt = '%B %d, %Y'
   44.64 +
   44.65 +# List of documents that shouldn't be included in the build.
   44.66 +#unused_docs = []
   44.67 +
   44.68 +# List of directories, relative to source directory, that shouldn't be searched
   44.69 +# for source files.
   44.70 +exclude_trees = []
   44.71 +
   44.72 +# The reST default role (used for this markup: `text`) to use for all documents.
   44.73 +#default_role = None
   44.74 +
   44.75 +# If true, '()' will be appended to :func: etc. cross-reference text.
   44.76 +#add_function_parentheses = True
   44.77 +
   44.78 +# If true, the current module name will be prepended to all description
   44.79 +# unit titles (such as .. function::).
   44.80 +#add_module_names = True
   44.81 +
   44.82 +# If true, sectionauthor and moduleauthor directives will be shown in the
   44.83 +# output. They are ignored by default.
   44.84 +#show_authors = False
   44.85 +
   44.86 +# The name of the Pygments (syntax highlighting) style to use.
   44.87 +pygments_style = 'sphinx'
   44.88 +
   44.89 +# A list of ignored prefixes for module index sorting.
   44.90 +#modindex_common_prefix = []
   44.91 +
   44.92 +
   44.93 +# -- Options for HTML output ---------------------------------------------------
   44.94 +
   44.95 +# The theme to use for HTML and HTML Help pages.  Major themes that come with
   44.96 +# Sphinx are currently 'default' and 'sphinxdoc'.
   44.97 +html_theme = 'default'
   44.98 +
   44.99 +# Theme options are theme-specific and customize the look and feel of a theme
  44.100 +# further.  For a list of options available for each theme, see the
  44.101 +# documentation.
  44.102 +#html_theme_options = {}
  44.103 +
  44.104 +# Add any paths that contain custom themes here, relative to this directory.
  44.105 +#html_theme_path = []
  44.106 +
  44.107 +# The name for this set of Sphinx documents.  If None, it defaults to
  44.108 +# "<project> v<release> documentation".
  44.109 +#html_title = None
  44.110 +
  44.111 +# A shorter title for the navigation bar.  Default is the same as html_title.
  44.112 +#html_short_title = None
  44.113 +
  44.114 +# The name of an image file (relative to this directory) to place at the top
  44.115 +# of the sidebar.
  44.116 +#html_logo = None
  44.117 +
  44.118 +# The name of an image file (within the static path) to use as favicon of the
  44.119 +# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
  44.120 +# pixels large.
  44.121 +#html_favicon = None
  44.122 +
  44.123 +# Add any paths that contain custom static files (such as style sheets) here,
  44.124 +# relative to this directory. They are copied after the builtin static files,
  44.125 +# so a file named "default.css" will overwrite the builtin "default.css".
  44.126 +html_static_path = ['_static']
  44.127 +
  44.128 +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
  44.129 +# using the given strftime format.
  44.130 +#html_last_updated_fmt = '%b %d, %Y'
  44.131 +
  44.132 +# If true, SmartyPants will be used to convert quotes and dashes to
  44.133 +# typographically correct entities.
  44.134 +#html_use_smartypants = True
  44.135 +
  44.136 +# Custom sidebar templates, maps document names to template names.
  44.137 +#html_sidebars = {}
  44.138 +
  44.139 +# Additional templates that should be rendered to pages, maps page names to
  44.140 +# template names.
  44.141 +#html_additional_pages = {}
  44.142 +
  44.143 +# If false, no module index is generated.
  44.144 +#html_use_modindex = True
  44.145 +
  44.146 +# If false, no index is generated.
  44.147 +#html_use_index = True
  44.148 +
  44.149 +# If true, the index is split into individual pages for each letter.
  44.150 +#html_split_index = False
  44.151 +
  44.152 +# If true, links to the reST sources are added to the pages.
  44.153 +#html_show_sourcelink = True
  44.154 +
  44.155 +# If true, an OpenSearch description file will be output, and all pages will
  44.156 +# contain a <link> tag referring to it.  The value of this option must be the
  44.157 +# base URL from which the finished HTML is served.
  44.158 +#html_use_opensearch = ''
  44.159 +
  44.160 +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
  44.161 +#html_file_suffix = ''
  44.162 +
  44.163 +# Output file base name for HTML help builder.
  44.164 +htmlhelp_basename = 'allpydoc'
  44.165 +
  44.166 +
  44.167 +# -- Options for LaTeX output --------------------------------------------------
  44.168 +
  44.169 +# The paper size ('letter' or 'a4').
  44.170 +#latex_paper_size = 'letter'
  44.171 +
  44.172 +# The font size ('10pt', '11pt' or '12pt').
  44.173 +#latex_font_size = '10pt'
  44.174 +
  44.175 +# Grouping the document tree into LaTeX files. List of tuples
  44.176 +# (source start file, target name, title, author, documentclass [howto/manual]).
  44.177 +latex_documents = [
  44.178 +  ('index', 'allpy.tex', u'allpy Documentation',
  44.179 +   u'many people', 'manual'),
  44.180 +]
  44.181 +
  44.182 +# The name of an image file (relative to this directory) to place at the top of
  44.183 +# the title page.
  44.184 +#latex_logo = None
  44.185 +
  44.186 +# For "manual" documents, if this is true, then toplevel headings are parts,
  44.187 +# not chapters.
  44.188 +#latex_use_parts = False
  44.189 +
  44.190 +# Additional stuff for the LaTeX preamble.
  44.191 +#latex_preamble = ''
  44.192 +
  44.193 +# Documents to append as an appendix to all manuals.
  44.194 +#latex_appendices = []
  44.195 +
  44.196 +# If false, no module index is generated.
  44.197 +#latex_use_modindex = True
    45.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.2 +++ b/docs/source/index.rst	Tue Jan 25 16:03:00 2011 +0300
    45.3 @@ -0,0 +1,22 @@
    45.4 +.. allpy documentation master file, created by
    45.5 +   sphinx-quickstart on Mon Dec 13 22:14:23 2010.
    45.6 +   You can adapt this file completely to your liking, but it should at least
    45.7 +   contain the root `toctree` directive.
    45.8 +
    45.9 +Welcome to allpy's documentation!
   45.10 +=================================
   45.11 +
   45.12 +Contents:
   45.13 +
   45.14 +.. toctree::
   45.15 +   :maxdepth: 2
   45.16 +
   45.17 +   allpy/modules
   45.18 +
   45.19 +Indices and tables
   45.20 +==================
   45.21 +
   45.22 +* :ref:`genindex`
   45.23 +* :ref:`modindex`
   45.24 +* :ref:`search`
   45.25 +
    46.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.2 +++ b/geometrical_core/geometrical-core	Tue Jan 25 16:03:00 2011 +0300
    46.3 @@ -0,0 +1,181 @@
    46.4 +#!/usr/bin/python
    46.5 +"""
    46.6 +Geometrical Core building tool
    46.7 +version 2.0
    46.8 +"""
    46.9 +
   46.10 +from allpy import config, alignment, block
   46.11 +Block = block.Block
   46.12 +Alignment = alignment.Alignment
   46.13 +import argparse
   46.14 +import os
   46.15 +from tempfile import NamedTemporaryFile
   46.16 +
   46.17 +r = argparse.FileType('r')
   46.18 +w = argparse.FileType('w')
   46.19 +c = config
   46.20 +
   46.21 +def f_nng(string):
   46.22 +    """ Validates nonnegative (>=0) float """
   46.23 +    try:
   46.24 +        value = float(string)
   46.25 +    except:
   46.26 +        msg = "%r is wrong float" % string
   46.27 +        raise argparse.ArgumentTypeError(msg)
   46.28 +    if value < 0:
   46.29 +        msg = "%r is negative" % string
   46.30 +        raise argparse.ArgumentTypeError(msg)
   46.31 +    return value
   46.32 +
   46.33 +def part(string):
   46.34 +    """ Validates 0.0 <= float <= 1.0 """
   46.35 +    try:
   46.36 +        value = float(string)
   46.37 +    except:
   46.38 +        msg = "%r is wrong float" % string
   46.39 +        raise argparse.ArgumentTypeError(msg)
   46.40 +    if not (0.0 <= value <= 1.0):
   46.41 +        msg = "%r is not float, representing part, ie in [0, 1]" % string
   46.42 +        raise argparse.ArgumentTypeError(msg)
   46.43 +    return value
   46.44 +
   46.45 +def timeout(string):
   46.46 +    """ Validates int >= -1 """
   46.47 +    try:
   46.48 +        value = int(string)
   46.49 +    except:
   46.50 +        msg = "%r is wrong integer" % string
   46.51 +        raise argparse.ArgumentTypeError(msg)
   46.52 +    if value < -1:
   46.53 +        msg = "integer %r is less than -1" % string
   46.54 +        raise argparse.ArgumentTypeError(msg)
   46.55 +    return value
   46.56 +
   46.57 +def pos(string):
   46.58 +    """ Validates positive integer """
   46.59 +    try:
   46.60 +        value = int(string)
   46.61 +    except:
   46.62 +        msg = "%r is wrong integer" % string
   46.63 +        raise argparse.ArgumentTypeError(msg)
   46.64 +    if value < 1:
   46.65 +        msg = "%r is not positive integer" % string
   46.66 +        raise argparse.ArgumentTypeError(msg)
   46.67 +    return value
   46.68 +
   46.69 +def i_nng(string):
   46.70 +    """ Validates int >= 0 """
   46.71 +    try:
   46.72 +        value = int(string)
   46.73 +    except:
   46.74 +        msg = "%r is wrong integer" % string
   46.75 +        raise argparse.ArgumentTypeError(msg)
   46.76 +    if value < 0:
   46.77 +        msg = "integer %r is less than 0" % string
   46.78 +        raise argparse.ArgumentTypeError(msg)
   46.79 +    return value
   46.80 +
   46.81 +p = argparse.ArgumentParser(
   46.82 +description='Geometrical Core building tool.',
   46.83 +epilog='''1) Distance spreading [angstrom]
   46.84 +2) -1 timeout means running Bron-Kerbosh algorithm without timeout
   46.85 +3) Alternative core new aa part: read documentation for more information
   46.86 +4) Superposition core identifier: main core is 0, first alternative is 1 etc. ''',
   46.87 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
   46.88 +#~ argument_default=argparse.SUPPRESS,
   46.89 +)
   46.90 +
   46.91 +p.add_argument('-v','--version',action='version',version='%(prog)s 2.0')
   46.92 +p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True)
   46.93 +p.add_argument('-c',help='PDB names conformity file',metavar='FILE',type=r)
   46.94 +p.add_argument('-l',help='Output list file',metavar='FILE',type=w)
   46.95 +p.add_argument('-f',help='Output fasta file',metavar='FILE',type=w)
   46.96 +p.add_argument('-g',help='Output msf file',metavar='FILE',type=w)
   46.97 +p.add_argument('-p',help='Output pdb file',metavar='FILE',type=w)
   46.98 +p.add_argument('-s',help='Output spt file',metavar='FILE',type=w)
   46.99 +p.add_argument('-d',help='Distance spreading',metavar='DELTA',type=f_nng,default=c.delta)
  46.100 +p.add_argument('-m',help='Min core size',metavar='MIN_SIZE',type=pos,default=c.minsize)
  46.101 +p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',type=timeout,default=c.timeout)
  46.102 +p.add_argument('-n',help='Alternative core new aa part',type=part,default=c.ac_new_atoms)
  46.103 +p.add_argument('-a',help='Cores count',metavar='NEW_ATOMS',type=pos,default=c.ac_count)
  46.104 +p.add_argument('-x',help='Superposition core identifier',type=i_nng,default=0)
  46.105 +
  46.106 +tmp_file = None
  46.107 +
  46.108 +try:
  46.109 +    args = p.parse_args()
  46.110 +    
  46.111 +    if not args.l and not args.f and not args.g and not args.p and not args.s:
  46.112 +        print 'Error: no output file provided'
  46.113 +        exit()
  46.114 +    if not (args.p and args.s) and not (not args.p and not args.s):
  46.115 +        print 'Error: provide both pdb and spt file or none of them'
  46.116 +        exit()
  46.117 +    
  46.118 +    try:
  46.119 +        alignment = Alignment(args.i)
  46.120 +    except:
  46.121 +        args.i.close()
  46.122 +        tmp_file = NamedTemporaryFile(delete=False)
  46.123 +        tmp_file.close()
  46.124 +        os.system('seqret %(msf)s %(fasta)s' % \
  46.125 +        {'msf': args.i.name, 'fasta': tmp_file.name})
  46.126 +        args.i = open(tmp_file.name)
  46.127 +        alignment = Alignment(args.i)
  46.128 +    
  46.129 +    block = Block(alignment)
  46.130 +    GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 
  46.131 +        minsize=args.t, ac_new_atoms=args.n, ac_count=args.a)
  46.132 +    
  46.133 +    if not GCs:
  46.134 +        print 'No cores! Try to change parameters'
  46.135 +        exit()
  46.136 +    
  46.137 +    if args.l:
  46.138 +        l = args.l
  46.139 +        
  46.140 +        l.write('Geometrical core positions for alignment %s' % args.i.name)
  46.141 +        l.write('\n\n')
  46.142 +        l.write('First alignment position is 0')
  46.143 +        
  46.144 +        for i, GC in enumerate(GCs):
  46.145 +            l.write('\n\n')
  46.146 +            if i == 0:
  46.147 +                l.write('Geometrical core:')
  46.148 +            else:
  46.149 +                l.write('Alternative geometrical core %i:' % i)
  46.150 +            l.write('\n')
  46.151 +            l.write(', '.join(str(n) for n in GC.positions))
  46.152 +        l.close()
  46.153 +    
  46.154 +    if args.g and not args.f:
  46.155 +        args.f = args.g
  46.156 +    
  46.157 +    if args.f:
  46.158 +        args.i.seek(0)
  46.159 +        f = args.f
  46.160 +        f.write(args.i.read()) # write sequences
  46.161 +        
  46.162 +        # write GCs
  46.163 +        for i, GC in enumerate(GCs):
  46.164 +            f.write('\n\n')
  46.165 +            if i == 0:
  46.166 +                GC.save_xstring(f, 'GC', 'Main geometrical core')
  46.167 +            else:
  46.168 +                GC.save_xstring(f, 'AGC_%i' % i, 'Alternative geometrical core %i' % i)
  46.169 +        f.close()
  46.170 +
  46.171 +        
  46.172 +    if args.g:
  46.173 +        args.g.close()
  46.174 +        os.system('seqret %(fasta)s msf::%(msf)s' % \
  46.175 +        {'fasta': args.f.name, 'msf': args.g.name})
  46.176 +    
  46.177 +    
  46.178 +except Exception, t:
  46.179 +    print t
  46.180 +    exit()
  46.181 +
  46.182 +if tmp_file:
  46.183 +    os.unlink(tmp_file)
  46.184 +
    47.1 --- a/geometrical_core/geometrical_core.py	Mon Jan 24 21:40:10 2011 +0300
    47.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.3 @@ -1,181 +0,0 @@
    47.4 -"""
    47.5 -Geometrical Core building tool
    47.6 -version 2.0
    47.7 -"""
    47.8 -
    47.9 -from allpy.lib import config, project, block
   47.10 -Block = block.Block
   47.11 -Project = project.Project
   47.12 -import argparse
   47.13 -import os
   47.14 -from tempfile import NamedTemporaryFile
   47.15 -
   47.16 -r = argparse.FileType('r')
   47.17 -w = argparse.FileType('w')
   47.18 -c = config
   47.19 -
   47.20 -def f_nng(string):
   47.21 -    """ Validates nonnegative (>=0) float """
   47.22 -    try:
   47.23 -        value = float(string)
   47.24 -    except:
   47.25 -        msg = "%r is wrong float" % string
   47.26 -        raise argparse.ArgumentTypeError(msg)
   47.27 -    if value < 0:
   47.28 -        msg = "%r is negative" % string
   47.29 -        raise argparse.ArgumentTypeError(msg)
   47.30 -    return value
   47.31 -
   47.32 -def part(string):
   47.33 -    """ Validates 0.0 <= float <= 1.0 """
   47.34 -    try:
   47.35 -        value = float(string)
   47.36 -    except:
   47.37 -        msg = "%r is wrong float" % string
   47.38 -        raise argparse.ArgumentTypeError(msg)
   47.39 -    if not (0.0 <= value <= 1.0):
   47.40 -        msg = "%r is not float, representing part, ie in [0, 1]" % string
   47.41 -        raise argparse.ArgumentTypeError(msg)
   47.42 -    return value
   47.43 -
   47.44 -def timeout(string):
   47.45 -    """ Validates int >= -1 """
   47.46 -    try:
   47.47 -        value = int(string)
   47.48 -    except:
   47.49 -        msg = "%r is wrong integer" % string
   47.50 -        raise argparse.ArgumentTypeError(msg)
   47.51 -    if value < -1:
   47.52 -        msg = "integer %r is less than -1" % string
   47.53 -        raise argparse.ArgumentTypeError(msg)
   47.54 -    return value
   47.55 -
   47.56 -def pos(string):
   47.57 -    """ Validates positive integer """
   47.58 -    try:
   47.59 -        value = int(string)
   47.60 -    except:
   47.61 -        msg = "%r is wrong integer" % string
   47.62 -        raise argparse.ArgumentTypeError(msg)
   47.63 -    if value < 1:
   47.64 -        msg = "%r is not positive integer" % string
   47.65 -        raise argparse.ArgumentTypeError(msg)
   47.66 -    return value
   47.67 -
   47.68 -def i_nng(string):
   47.69 -    """ Validates int >= 0 """
   47.70 -    try:
   47.71 -        value = int(string)
   47.72 -    except:
   47.73 -        msg = "%r is wrong integer" % string
   47.74 -        raise argparse.ArgumentTypeError(msg)
   47.75 -    if value < 0:
   47.76 -        msg = "integer %r is less than 0" % string
   47.77 -        raise argparse.ArgumentTypeError(msg)
   47.78 -    return value
   47.79 -
   47.80 -p = argparse.ArgumentParser(
   47.81 -description='Geometrical Core building tool.',
   47.82 -epilog='''1) Distance spreading [angstrom]
   47.83 -2) -1 timeout means running Bron-Kerbosh algorithm without timeout
   47.84 -3) Alternative core new aa part: read documentation for more information
   47.85 -4) Superposition core identifier: main core is 0, first alternative is 1 etc. ''',
   47.86 -formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
   47.87 -#~ argument_default=argparse.SUPPRESS,
   47.88 -)
   47.89 -
   47.90 -p.add_argument('-v','--version',action='version',version='%(prog)s 2.0')
   47.91 -p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True)
   47.92 -p.add_argument('-c',help='PDB names conformity file',metavar='FILE',type=r)
   47.93 -p.add_argument('-l',help='Output list file',metavar='FILE',type=w)
   47.94 -p.add_argument('-f',help='Output fasta file',metavar='FILE',type=w)
   47.95 -p.add_argument('-g',help='Output msf file',metavar='FILE',type=w)
   47.96 -p.add_argument('-p',help='Output pdb file',metavar='FILE',type=w)
   47.97 -p.add_argument('-s',help='Output spt file',metavar='FILE',type=w)
   47.98 -p.add_argument('-d',help='Distance spreading',metavar='DELTA',type=f_nng,default=c.delta)
   47.99 -p.add_argument('-m',help='Min core size',metavar='MIN_SIZE',type=pos,default=c.minsize)
  47.100 -p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',type=timeout,default=c.timeout)
  47.101 -p.add_argument('-n',help='Alternative core new aa part',type=part,default=c.ac_new_atoms)
  47.102 -p.add_argument('-a',help='Cores count',metavar='NEW_ATOMS',type=pos,default=c.ac_count)
  47.103 -p.add_argument('-x',help='Superposition core identifier',type=i_nng,default=0)
  47.104 -
  47.105 -tmp_file = None
  47.106 -
  47.107 -try:
  47.108 -    args = p.parse_args()
  47.109 -    
  47.110 -    if not args.l and not args.f and not args.g and not args.p and not args.s:
  47.111 -        print 'Error: no output file provided'
  47.112 -        exit()
  47.113 -    if not (args.p and args.s) and not (not args.p and not args.s):
  47.114 -        print 'Error: provide both pdb and spt file or none of them'
  47.115 -        exit()
  47.116 -    
  47.117 -    try:
  47.118 -        project = Project(args.i)
  47.119 -    except:
  47.120 -        args.i.close()
  47.121 -        tmp_file = NamedTemporaryFile(delete=False)
  47.122 -        tmp_file.close()
  47.123 -        os.system('seqret %(msf)s %(fasta)s' % \
  47.124 -        {'msf': args.i.name, 'fasta': tmp_file.name})
  47.125 -        args.i = open(tmp_file.name)
  47.126 -        project = Project(args.i)
  47.127 -    
  47.128 -    project.pdb_auto_add(args.c)
  47.129 -    block = Block(project)
  47.130 -    GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 
  47.131 -        minsize=args.t, ac_new_atoms=args.n, ac_count=args.a)
  47.132 -    
  47.133 -    if not GCs:
  47.134 -        print 'No cores! Try to change parameters'
  47.135 -        exit()
  47.136 -    
  47.137 -    if args.l:
  47.138 -        l = args.l
  47.139 -        
  47.140 -        l.write('Geometrical core positions for alignment %s' % args.i.name)
  47.141 -        l.write('\n\n')
  47.142 -        l.write('First alignment position is 0')
  47.143 -        
  47.144 -        for i, GC in enumerate(GCs):
  47.145 -            l.write('\n\n')
  47.146 -            if i == 0:
  47.147 -                l.write('Geometrical core:')
  47.148 -            else:
  47.149 -                l.write('Alternative geometrical core %i:' % i)
  47.150 -            l.write('\n')
  47.151 -            l.write(', '.join(str(n) for n in GC.positions))
  47.152 -        l.close()
  47.153 -    
  47.154 -    if args.g and not args.f:
  47.155 -        args.f = args.g
  47.156 -    
  47.157 -    if args.f:
  47.158 -        args.i.seek(0)
  47.159 -        f = args.f
  47.160 -        f.write(args.i.read()) # write sequences
  47.161 -        
  47.162 -        # write GCs
  47.163 -        for i, GC in enumerate(GCs):
  47.164 -            f.write('\n\n')
  47.165 -            if i == 0:
  47.166 -                GC.save_xstring(f, 'GC', 'Main geometrical core')
  47.167 -            else:
  47.168 -                GC.save_xstring(f, 'AGC_%i' % i, 'Alternative geometrical core %i' % i)
  47.169 -        f.close()
  47.170 -
  47.171 -        
  47.172 -    if args.g:
  47.173 -        args.g.close()
  47.174 -        os.system('seqret %(fasta)s msf::%(msf)s' % \
  47.175 -        {'fasta': args.f.name, 'msf': args.g.name})
  47.176 -    
  47.177 -    
  47.178 -except Exception, t:
  47.179 -    print t
  47.180 -    exit()
  47.181 -
  47.182 -if tmp_file:
  47.183 -    os.unlink(tmp_file)
  47.184 -
    48.1 --- a/lib/allpy_data/AAdict.py	Mon Jan 24 21:40:10 2011 +0300
    48.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.3 @@ -1,1200 +0,0 @@
    48.4 -
    48.5 -"""
    48.6 -Dict:
    48.7 -	key: 3-letter code from PDB
    48.8 -	value: (1-letter code, type, is_modified, None)
    48.9 -		type:
   48.10 -			p - protein
   48.11 -			d - dna
   48.12 -			r - rna
   48.13 -		is_modified: "M" or None
   48.14 -        None
   48.15 -        name
   48.16 -"""
   48.17 -
   48.18 -AAdict = {
   48.19 -"CPI": ('x', 'p', None, None, '6-CARBOXYPIPERIDINE'),
   48.20 -"AGM": ('r', 'p', 'M', None, '5-METHYL-ARGININE'),
   48.21 -"G25": ('g', 'r', 'M', None, '"GUANOSINE-5\'-MONOPHOSPHATE"'),
   48.22 -"CPN": ('c', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE'),
   48.23 -"MA6": ('a', 'r', 'M', None, '"6N-DIMETHYLADENOSINE-5\'-MONOPHOSHATE"'),
   48.24 -"NYS": ('c', 'p', 'M', None, 'S-{5-[(1R)-2-AMINO-1-HYDROXYETHYL]-2,3-DIHYDROXYPHENYL}-L-CYSTEINE'),
   48.25 -"CPC": ('x', 'p', None, None, '"2-METHYL-1-METHYLAMINO-CYCLOPROPANE'),
   48.26 -"8FG": ('g', 'd', 'M', None, '"N-(5\'-PHOSPHO-2\'-DEOXYGUANOSIN-8-YL)-2-ACETYLAMINOFLUORENE"'),
   48.27 -"SIC": ('x', 'p', None, None, '"(2R)-2-[(3S)-3-AMINO-2,5-DIOXOPYRROLIDIN-1-YL]-3-SULFANYLPROPANOIC'),
   48.28 -"PQ1": ('n', 'r', None, None, '"PHOSPHORIC'),
   48.29 -"PAS": ('d', 'p', 'M', None, '"2-AMINO-4-OXO-4-PHOSPHONOOXY-BUTYRIC'),
   48.30 -"EOV": ('x', 'p', None, None, '(3S,6S,9S,12R,15S,18S,21S,24S,30S,33S)-30-ethyl-33-[(1R,2R,4E)-1-hydroxy-2-methylhex-4-en-1-yl]-1,4,7,10,12,15,19,25,28-nonamethyl-3,21-bis(1-methylethyl)-6,9,18,24-tetrakis(2-methylpropyl)-1,4,7,10,13,16,19,22,25,28,31-undecaazacyclotritriacontane-2,5,8,11,14,17,20,23,26,29,32-undecone'),
   48.31 -"B2I": ('i', 'p', 'M', None, '"ISOLEUCINE'),
   48.32 -"NYM": ('n', 'd', None, None, "3'-DEOXY-3'-AMINOTHYMIDINE MONOPHOSPHATE"),
   48.33 -"URD": ('u', 'r', 'M', None, '1-((2R,3R,4S,5R)-TETRAHYDRO-3,4-DIHYDROXY-5-(HYDROXYMETHYL)FURAN-2-YL)PYRIDINE-2,4(1H,3H)-DIONE'),
   48.34 -"NYC": ('x', 'p', None, None, '"[(4E)-2-[(1R,2S)-1-AMINO-2-HYDROXYPROPYL]-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
   48.35 -"AGT": ('x', 'p', None, None, 'S-{(S)-AMINO[(4-AMINOBUTYL)AMINO]METHYL}-L-CYSTEINE'),
   48.36 -"NYG": ('x', 'p', None, None, '"[(4Z)-2-[(1S)-1,3-DIAMINO-3-OXOPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
   48.37 -"CPV": ('x', 'p', None, None, '"5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOPROPYL-HEXANOIC'),
   48.38 -"TAV": ('d', 'p', 'M', None, '"N-METHYL-N-{2-[(2-NAPHTHYLSULFONYL)AMINO]-5-[(2-NAPHTHYLSULFONYL)OXY]BENZOYL}-L-ASPARTIC'),
   48.39 -"S4G": ('g', 'd', 'M', None, "2'-deoxy-4'-thioguanosine 5'-(dihydrogen phosphate)"),
   48.40 -"MA7": ('a', 'd', 'M', None, '"1N-METHYLADENOSINE-5\'-MONOPHOSPHATE"'),
   48.41 -"S4C": ('c', 'r', 'M', None, '"4\'-THIO-4\'-DEOXY-CYTOSINE-5\'-MONOPHOSPHATE"'),
   48.42 -"UAR": ('u', 'r', 'M', None, "URACIL ARABINOSE-5'-PHOSPHATE"),
   48.43 -"SPT": ('t', 'd', 'M', None, "5'-THIO-THYMIDINE PHOSPHONIC ACID"),
   48.44 -"S4U": ('u', 'r', 'M', None, '"4-THIOURIDINE-5\'-PHOSPHATE"'),
   48.45 -"TAF": ('t', 'd', 'M', None, "2'-DEOXY-2'-FLUORO-ARABINO-FURANOSYL THYMINE-5'-PHOSPHATE"),
   48.46 -"5NC": ('c', 'd', 'M', None, '"5-AZA-CYTIDINE-5\'MONOPHOSPHATE"'),
   48.47 -"CSF": ('c', 'r', 'M', None, "CYTIDINE-5'-MONOPHOSPHATE-3-FLUORO-N-ACETYL-NEURAMINIC ACID"),
   48.48 -"0CS": ('a', 'p', 'M', None, '3-[(S)-HYDROPEROXYSULFINYL]-L-ALANINE'),
   48.49 -"B2V": ('v', 'p', 'M', None, '"VALINE'),
   48.50 -"TAL": ('n', 'r', None, None, '9-(6-DEOXY-ALPHA-L-TALOFURANOSYL)-6-METHYLPURINE'),
   48.51 -"SCY": ('c', 'p', 'M', None, 'S-ACETYL-CYSTEINE'),
   48.52 -"KYQ": ('k', 'p', 'M', None, '(E)-N~6~-(1-CARBOXY-2-HYDROXYETHYLIDENE)-L-LYSINE'),
   48.53 -"CS8": ('n', 'r', None, None, '"3-THIAOCTANOYL-COENZYME'),
   48.54 -"TA4": ('x', 'p', None, None, '"(S)-2-[4-(AMINOMETHYL)-1H-1,2,3-TRIAZOL-1-YL]-4-METHYLPENTANOIC'),
   48.55 -"ALN": ('a', 'p', 'M', None, 'NAPHTHALEN-2-YL-3-ALANINE'),
   48.56 -"GS": ('g', 'd', 'M', None, '"GUANOSINE-5\'-THIO-MONOPHOSPHATE"'),
   48.57 -"2FE": ('n', 'd', None, None, '"2\'-FLUORO-2\'-DEOXY-1,N6-ETHENOADENINE"'),
   48.58 -"VAL": ('V', 'p', None, None, 'VALINE'),
   48.59 -"7DA": ('a', 'd', 'M', None, '"7-DEAZA-2\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE"'),
   48.60 -"XPC": ('x', 'p', None, None, '"(3S,4R)-4-AMINOPYRROLIDINE-3-CARBOXYLIC'),
   48.61 -"VAF": ('v', 'p', 'M', None, 'METHYLVALINE'),
   48.62 -"VAD": ('v', 'p', 'M', None, 'DEAMINOHYDROXYVALINE'),
   48.63 -"BGM": ('g', 'd', 'M', None, '"8-BROMO-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
   48.64 -"P": ('g', 'd', 'M', None, "2'-DEOXY-N1,N2-PROPANO GUANOSINE MONOPHOSPHATE"),
   48.65 -"LHU": ('u', 'r', 'M', None, '"L-URIDINE-5\'-MONOPHOSPHATE"'),
   48.66 -"T48": ('t', 'd', 'M', None, "6'-ALPHA-HYDROXY CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"),
   48.67 -"2GT": ('t', 'd', 'M', None, "2'-O-PROPARGYL THYMIDINE-5'-MONOPHOSPHATE"),
   48.68 -"HMF": ('a', 'p', 'M', None, '"2-AMINO-4-PHENYL-BUTYRIC'),
   48.69 -"BNN": ('a', 'p', 'M', None, 'ACETYL-P-AMIDINOPHENYLALANINE'),
   48.70 -"AHB": ('n', 'p', 'M', None, 'BETA-HYDROXYASPARAGINE'),
   48.71 -"NPH": ('c', 'p', 'M', None, 'CYSTEINE-METHYLENE-CARBAMOYL-1,10-PHENANTHROLINE'),
   48.72 -"SIB": ('c', 'p', 'M', None, '"(2S)-2-AMINO-4-({[(2S,3S,4R,5R)-3,4-DIHYDROXY-5-(6-OXO-1,6-DIHYDRO-9H-PURIN-9-YL)TETRAHYDROFURAN-2-YL]METHYL}THIO)BUTANOIC'),
   48.73 -"SNN": ('n', 'p', 'M', None, 'L-3-AMINOSUCCINIMIDE'),
   48.74 -"AHH": ('x', 'p', None, None, '"AMINO-HYDROXYHEPTANOIC'),
   48.75 -"JW5": ('n', 'r', None, None, "6-(HYDROXYMETHYL)URIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
   48.76 -"PBB": ('c', 'p', 'M', None, 'S-(4-BROMOBENZYL)CYSTEINE'),
   48.77 -"PTR": ('y', 'p', 'M', None, 'O-PHOSPHOTYROSINE'),
   48.78 -"MAA": ('a', 'p', 'M', None, 'N-METHYLALANINE'),
   48.79 -"AHS": ('x', 'p', None, None, '"(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-ISOBUTYL-CARBAMIC'),
   48.80 -"AHP": ('a', 'p', 'M', None, '"2-AMINO-HEPTANOIC'),
   48.81 -"MAD": ('a', 'r', 'M', None, '"6-HYDRO-1-METHYLADENOSINE-5\'-MONOPHOSPHATE"'),
   48.82 -"AHT": ('x', 'p', None, None, '4-(2,5-DIAMINO-5-HYDROXY-PENTYL)-PHENOL'),
   48.83 -"XYG": ('x', 'p', None, None, '"[(4Z)-2-[(1Z)-ETHANIMIDOYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
   48.84 -"MAI": ('r', 'p', 'M', None, 'DEOXO-METHYLARGININE'),
   48.85 -"CP1": ('c', 'd', 'M', None, '2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE'),
   48.86 -"G2L": ('g', 'r', 'M', None, '"3\'-O-METHYOXYETHYL-GUANOSINE-5\'-MONOPHOSPHATE"'),
   48.87 -"FFD": ('n', 'd', None, None, '(1R)-1,4-ANHYDRO-2-DEOXY-1-(3-FLUOROPHENYL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL'),
   48.88 -"CML": ('c', 'p', 'M', None, '"(2S)-2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}BUTANEDIOIC'),
   48.89 -"CMH": ('c', 'p', 'M', None, 'S-(METHYLMERCURY)-L-CYSTEINE'),
   48.90 -"ZDU": ('n', 'd', None, None, '"5-(3-AMINOPROPYL)-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
   48.91 -"PCS": ('f', 'p', 'M', None, 'PHENYLALANYLMETHYLCHLORIDE'),
   48.92 -"CME": ('c', 'p', 'M', None, 'S,S-(2-HYDROXYETHYL)THIOCYSTEINE'),
   48.93 -"CSO": ('c', 'p', 'M', None, 'S-HYDROXYCYSTEINE'),
   48.94 -"193": ('x', 'p', None, None, '"(2S)-4-(BETA-ALANYLAMINO)-2-AMINOBUTANOIC'),
   48.95 -"1TY": ('y', 'p', 'M', None, '3-{(3E)-4-HYDROXY-6-OXO-3-[(2-PHENYLETHYL)IMINO]CYCLOHEXA-1,4-DIEN-1-YL}ALANINE'),
   48.96 -"CB2": ('c', 'd', 'M', None, '"PHOSPHORIC'),
   48.97 -"CS3": ('c', 'p', 'M', None, 'S-[3-OXO-3-(2-THIENYL)PROPYL]-L-CYSTEINE'),
   48.98 -"64T": ('t', 'd', 'M', None, '"5-HYDROXY-THYMIDINE-5\'-MONOPHOSPHATE"'),
   48.99 -"4AC": ('n', 'r', None, None, '"N(4)-ACETYLCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.100 -"B3X": ('n', 'p', 'M', None, '"(3S)-3,5-DIAMINO-5-OXOPENTANOIC'),
  48.101 -"TST": ('x', 'p', None, None, '"4-METHYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PENTANOIC'),
  48.102 -"CMT": ('c', 'p', 'M', None, 'O-METHYLCYSTEINE'),
  48.103 -"CMR": ('c', 'd', 'M', None, '"2\'-DEOXY-CYTIDINE-5\'-RP-MONOMETHYLPHOSPHONATE"'),
  48.104 -"CSH": ('x', 'p', None, None, '"[2-(2-HYDROXY-1-METHYL-ETHYL)-4-(1H-IMIDAZOL-4-YLMETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC'),
  48.105 -"VLM": ('x', 'p', None, None, 'VALINYLAMINE'),
  48.106 -"G": ('G', 'r', None, None, '"GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.107 -"C4S": ('c', 'd', 'M', None, "2'-deoxy-4'-thiocytidine 5'-(dihydrogen phosphate)"),
  48.108 -"U2L": ('u', 'r', 'M', None, "5-METHYL-3'-O-METHOXYETHYL URIDINE-5'-MONOPHOSPHATE"),
  48.109 -"LSO": ('x', 'p', None, None, '(Z)-N~6~-(3-CARBOXY-1-{[(4-CARBOXY-2-OXOBUTYL)SULFONYL]METHYL}PROPYLIDENE)-L-LYSINE'),
  48.110 -"U2N": ('n', 'd', None, None, "2'-AMINO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.111 -"PLE": ('l', 'p', 'M', None, '"LEUCINE'),
  48.112 -"SIN": ('x', 'p', None, None, '"SUCCINIC'),
  48.113 -"M2L": ('k', 'p', 'M', None, '"(2R)-2-AMINO-3-(2-DIMETHYLAMINOETHYLSULFANYL)PROPANOIC'),
  48.114 -"I58": ('k', 'p', 'M', None, '4R-FLUORO-N6-ETHANIMIDOYL-L-LYSINE'),
  48.115 -"U2P": ('u', 'r', 'M', None, '"PHOSPHORIC'),
  48.116 -"B2A": ('a', 'p', 'M', None, '"ALANINE'),
  48.117 -"TTM": ('t', 'd', 'M', None, '"N3-ETHYL-THYMIDINE-5\'-MONOPHOSPHATE"'),
  48.118 -"CSD": ('c', 'p', 'M', None, '3-SULFINOALANINE'),
  48.119 -"M2G": ('g', 'r', 'M', None, '"N2-DIMETHYLGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.120 -"ASX": ('x', 'p', None, None, '"ASP/ASN'),
  48.121 -"AFF": ('n', 'd', None, None, '2-ACETYLAMINOFLUORENE-3-YL'),
  48.122 -"TTD": ('t', 'd', 'M', None, '"CIS-SYN'),
  48.123 -"AET": ('a', 'r', 'M', None, '"N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)METHYLCARBAMOYL]THREONINE-5\'-MONOPHOSPHATE"'),
  48.124 -"C45": ('c', 'd', 'M', None, '"N4-METHOXY-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.125 -"C46": ('c', 'd', 'M', None, '"6H,8H-3,4-DIHYDROPYRIMIDO[4,5-C][1,2]OXAZIN-7-0NE(CYTIDINE)-5\'-MONOPHOSPHATE"'),
  48.126 -"FTR": ('w', 'p', 'M', None, 'FLUOROTRYPTOPHANE'),
  48.127 -"BLY": ('k', 'p', 'M', None, '"LYSINE'),
  48.128 -"C42": ('c', 'd', 'M', None, '"3\'-AMINO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.129 -"C2S": ('c', 'd', 'M', None, '"CYTIDINE-5\'-DITHIOPHOSPHORATE"'),
  48.130 -"FTY": ('y', 'p', 'M', None, 'DEOXY-DIFLUOROMETHELENE-PHOSPHOTYROSINE'),
  48.131 -"CSA": ('c', 'p', 'M', None, 'S-ACETONYLCYSTEINE'),
  48.132 -"I5C": ('c', 'd', 'M', None, '"5-IODO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.133 -"ALY": ('k', 'p', 'M', None, 'N(6)-ACETYLLYSINE'),
  48.134 -"P1L": ('c', 'p', 'M', None, 'S-PALMITOYL-L-CYSTEINE'),
  48.135 -"C49": ('c', 'd', 'M', None, '"4-THIO,5-FLUORO,5-METHYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.136 -"DSP": ('d', 'p', 'M', None, '"D-ASPARTIC'),
  48.137 -"AFA": ('n', 'p', 'M', None, 'N-[7-METHYL-OCT-2,4-DIENOYL]ASPARAGINE'),
  48.138 -"P1P": ('n', 'r', None, None, '3-{[OXIDO(OXO)PHOSPHINO]OXY}PROPAN-1-OLATE'),
  48.139 -"U25": ('u', 'r', 'M', None, '"URIDINE-5\'-MONOPHOSPHATE"'),
  48.140 -"CHS": ('x', 'p', None, None, '"4-AMINO-5-CYCLOHEXYL-3-HYDROXY-PENTANOIC'),
  48.141 -"BLE": ('l', 'p', 'M', None, '"LEUCINE'),
  48.142 -"DSG": ('n', 'p', 'M', None, 'D-ASPARAGINE'),
  48.143 -"DSE": ('s', 'p', 'M', None, 'N-METHYL-D-SERINE'),
  48.144 -"S4A": ('a', 'd', 'M', None, "2'-deoxy-4'-thioadenosine 5'-(dihydrogen phosphate)"),
  48.145 -"DLS": ('k', 'p', 'M', None, 'DI-ACETYL-LYSINE'),
  48.146 -"4SU": ('u', 'r', 'M', None, '"4-THIOURIDINE-5\'-MONOPHOSPHATE"'),
  48.147 -"MA": ('a', 'p', 'M', None, '"METHYL'),
  48.148 -"TRX": ('w', 'p', 'M', None, '6-HYDROXYTRYPTOPHAN'),
  48.149 -"DC": ('C', 'd', None, None, '"2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.150 -"HLU": ('l', 'p', 'M', None, 'BETA-HYDROXYLEUCINE'),
  48.151 -"HSE": ('s', 'p', 'M', None, 'L-HOMOSERINE'),
  48.152 -"A5M": ('c', 'r', 'M', None, '"2\'-AMINE-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.153 -"ALS": ('a', 'p', 'M', None, '"2-AMINO-3-OXO-4-SULFO-BUTYRIC'),
  48.154 -"CBR": ('c', 'd', 'M', None, '"5-BROMO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.155 -"SHC": ('c', 'p', 'M', None, 'S-HEXYLCYSTEINE'),
  48.156 -"B2F": ('f', 'p', 'M', None, '"PHENYLALANINE'),
  48.157 -"QUO": ('g', 'r', 'M', None, '"2-AMINO-7-DEAZA-(2\'\',3\'\'-DIHYDROXY-CYCLOPENTYLAMINO)-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.158 -"X4A": ('n', 'd', None, None, '"[(2R,3S,5S)-2,3,5-TRIHYDROXYTETRAHYDROFURAN-2-YL]METHYL'),
  48.159 -"BE2": ('x', 'p', None, None, '"2-AMINOBENZOIC'),
  48.160 -"CM0": ('n', 'r', None, None, "5-(CARBOXYMETHOXY) URIDINE-5'-MONOPHOSPHATE"),
  48.161 -"GLH": ('q', 'p', 'M', None, 'N-5-CYCLOHEXYL-N-5-[(CYCLOHEXYLAMINO)CARBONYL]GLUTAMINE'),
  48.162 -"DOH": ('d', 'p', 'M', None, '"BETA-HYDROXY'),
  48.163 -"DON": ('l', 'p', 'M', None, '6-DIAZENYL-5-OXO-L-NORLEUCINE'),
  48.164 -"GLM": ('x', 'p', None, None, '1-AMINO-PROPAN-2-ONE'),
  48.165 -"GLN": ('Q', 'p', None, None, 'GLUTAMINE'),
  48.166 -"T4S": ('t', 'd', 'M', None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-HYDROSELENO-5-METHYLPYRIMIDIN-2(1H)-ONE'),
  48.167 -"DOC": ('c', 'd', 'M', None, '"2\',3\'-DIDEOXYCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.168 -"T3P": ('t', 'd', 'M', None, '"THYMIDINE-3\'-PHOSPHATE"'),
  48.169 -"DOA": ('x', 'p', None, None, '"12-AMINO-DODECANOIC'),
  48.170 -"TOX": ('w', 'p', 'M', None, '2-AMINO-3-(1-HYDROPEROXY-1H-INDOL-3-YL)PROPAN-1-OL'),
  48.171 -"SMP": ('a', 'd', 'M', None, '"2\'-DEOXY-ADENOSINE-5\'-SP-MONOMETHYLPHOSPHONATE"'),
  48.172 -"GLX": ('x', 'p', None, None, '"GLU/GLN'),
  48.173 -"GLY": ('G', 'p', None, None, 'GLYCINE'),
  48.174 -"GLZ": ('g', 'p', 'M', None, 'AMINO-ACETALDEHYDE'),
  48.175 -"CTG": ('t', 'd', 'M', None, '"(5R,6S)-5,6-DIHYDRO-5,6-DIHYDROXYTHYMIDINE-5\'-MONOPHOSPHATE"'),
  48.176 -"T38": ('n', 'r', None, None, "2'-O-ETHOXYMETHYLENE THYMIDINE 5'-MONOPHOSPHATE"),
  48.177 -"AKL": ('d', 'p', 'M', None, '"3-AMINO-5-CHLORO-4-OXOPENTANOIC'),
  48.178 -"T36": ('t', 'd', 'M', None, "SPLIT LINKAGE THYMIDINE 5'-MONOPHOSPHATE"),
  48.179 -"T37": ('t', 'd', 'M', None, "3'-AMINO-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"),
  48.180 -"4TA": ('n', 'r', None, None, '"P1-(5\'-ADENOSYL)P4-(5\'-(2\'-DEOXY-THYMIDYL))TETRAPHOSPHATE"'),
  48.181 -"CTH": ('t', 'p', 'M', None, '4-CHLOROTHREONINE'),
  48.182 -"GLU": ('E', 'p', None, None, '"GLUTAMIC'),
  48.183 -"RCY": ('c', 'p', 'M', None, '"S-[(3S,3\'R)-1\'-HYDROXY-2\',2\',5\',5\'-TETRAMETHYL-2,5-DIOXO-1,3\'-BIPYRROLIDIN-3-YL]-L-CYSTEINE"'),
  48.184 -"MDH": ('x', 'p', None, None, 'N-METHYLDEHYDROBUTYRINE'),
  48.185 -"MDO": ('x', 'p', None, None, '"{2-[(1S)-1-AMINOETHYL]-5-HYDROXY-4-METHYL-1H-IMIDAZOL-1-YL}ACETIC'),
  48.186 -"8OG": ('g', 'd', 'M', None, '"8-OXO-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.187 -"HCS": ('x', 'p', None, None, '"2-AMINO-4-MERCAPTO-BUTYRIC'),
  48.188 -"UD5": ('u', 'r', 'M', None, '5-FLUOROURIDINE'),
  48.189 -"O2C": ('n', 'd', None, None, '"3\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.190 -"5FU": ('u', 'r', 'M', None, '"5-FLUORO-URIDINE-5\'-MONOPHOSPHATE"'),
  48.191 -"SDG": ('g', 'd', 'M', None, '2-AMINO-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURINE-6-SELENOL'),
  48.192 -"MDR": ('n', 'd', None, None, '9-(2-DEOXY-BETA-D-RIBOFURANOSYL)-6-METHYLPURINE'),
  48.193 -"BSE": ('s', 'p', 'M', None, 'BETA-3-SERINE'),
  48.194 -"4IN": ('x', 'p', None, None, '4-AMINO-L-TRYPTOPHAN'),
  48.195 -"FCL": ('f', 'p', 'M', None, '3-CHLORO-L-PHENYLALANINE'),
  48.196 -"1SC": ('c', 'r', 'M', None, '"5\'-O-THIOPHOSPHONOCYTIDINE"'),
  48.197 -"R7A": ('c', 'p', 'M', None, '3-S-[(4-BROMO-2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROL-3-YL)METHYL]SULFANYL-L-CYSTEINE'),
  48.198 -"6IA": ('a', 'r', 'M', None, '"N6-ISOPENTENYL-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.199 -"XTS": ('g', 'r', 'M', None, '9-[(2R,3R,4S,5R)-3,4-DIHYDROXY-5-(HYDROXYMETHYL)OXOLAN-2-YL]-3H-PURINE-2,6-DIONE'),
  48.200 -"DFT": ('n', 'd', None, None, '"1-[2-DEOXYRIBOFURANOSYL]-2,4-DIFLUORO-5-METHYL-BENZENE-5\'MONOPHOSPHATE"'),
  48.201 -"FMU": ('n', 'r', None, None, '"5-FLUORO-5-METHYLURIDINE-5\'-MONOPHOSPHATE"'),
  48.202 -"XTY": ('n', 'd', None, None, '(1R)-1,4-ANHYDRO-2-DEOXY-1-(6-METHYL-2,4-DIOXO-1,2,3,4-TETRAHYDROQUINAZOLIN-8-YL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL'),
  48.203 -"MVA": ('v', 'p', 'M', None, 'N-METHYLVALINE'),
  48.204 -"LTA": ('x', 'p', None, None, '"4,8-DIAMINO-3-HYDROXY-OCTANOIC'),
  48.205 -"01W": ('x', 'p', None, None, '(2S)-2-AMMONIO-4-[(2,4-DINITROPHENYL)AMINO]BUTANOATE'),
  48.206 -"DFI": ('x', 'p', None, None, '2,2-DIFLUOROSTATINE'),
  48.207 -"YG": ('g', 'r', 'M', None, 'WYBUTOSINE'),
  48.208 -"FMG": ('g', 'd', 'M', None, '2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-7-METHYL-6-OXO-6,9-DIHYDRO-1H-PURIN-7-IUM'),
  48.209 -"DFG": ('g', 'd', 'M', None, "2'-DEOXY-L-RIBO-FURANOSYL GUANINE-5'-MONOPHOSPHATE"),
  48.210 -"XTL": ('t', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(THYMIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'),
  48.211 -"DFC": ('c', 'd', 'M', None, "2'-DEOXY-L-RIBO-FURANOSYL CYTOSINE-5'-MONOPHOSPHATE"),
  48.212 -"XTH": ('t', 'd', 'M', None, '1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-5-METHYLPYRIMIDINE-2,4(1H,3H)-DIONE'),
  48.213 -"NTY": ('y', 'p', 'M', None, '"BETA'),
  48.214 -"EDC": ('g', 'd', 'M', None, '"N3,N4-ETHENO-2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.215 -"T49": ('t', 'd', 'M', None, "S4'-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"),
  48.216 -"EDA": ('a', 'd', 'M', None, '"3-[2-DEOXY-RIBOFURANOSYL]-3H-1,3,4,5A,8-PENTAAZA-AS-INDACENE-5\'-MONOPHOSPHATE"'),
  48.217 -"O12": ('x', 'p', None, None, 'N~5~-DODECANOYL-L-ORNITHINE'),
  48.218 -"MMT": ('t', 'd', 'M', None, '"5\'-O-(DIMETHYLAMINO)-THYMIDINE"'),
  48.219 -"T41": ('n', 'r', None, None, "1-(2-O-METHYL-BETA-D-ARABINOFURANOSYL)THYMIDINE 5'-MONOPHOSPHATE"),
  48.220 -"LMS": ('n', 'r', None, None, '"[(2R,3S,4R,5R)-5-(6-AMINO-9H-PURIN-9-YL)-3,4-DIHYDROXYTETRAHYDRO-2-FURANYL]METHYL'),
  48.221 -"0AP": ('c', 'd', 'M', None, "2'-deoxycytidine 3',5'-bis(dihydrogen phosphate)"),
  48.222 -"ORQ": ('r', 'p', 'M', None, 'N~5~-ACETYL-L-ORNITHINE'),
  48.223 -"ORN": ('a', 'p', 'M', None, 'ORNITHINE'),
  48.224 -"ALM": ('a', 'p', 'M', None, '1-METHYL-ALANINAL'),
  48.225 -"VB1": ('k', 'p', 'M', None, 'N^6^-[(1R)-2-{[(1S)-1-CARBOXYPROPYL]AMINO}-2-OXO-1-(SULFANYLMETHYL)ETHYL]-6-OXO-L-LYSINE'),
  48.226 -"PUY": ('n', 'r', None, None, 'PUROMYCIN'),
  48.227 -"SBD": ('s', 'p', 'M', None, '"D-NAPHTHYL-1-ACETAMIDO'),
  48.228 -"LPS": ('s', 'p', 'M', None, 'O-{HYDROXY[((2R)-2-HYDROXY-3-{[(1S)-1-HYDROXYPENTADECYL]OXY}PROPYL)OXY]PHOSPHORYL}-L-SERINE'),
  48.229 -"SC": ('c', 'd', 'M', None, '"2-DEOXY-CYTIDINE-5\'-THIOPHOSPHORATE"'),
  48.230 -"ALT": ('a', 'p', 'M', None, 'THIOALANINE'),
  48.231 -"GL3": ('g', 'p', 'M', None, 'THIOGLYCIN'),
  48.232 -"LME": ('e', 'p', 'M', None, '"(3R)-3-METHYL-L-GLUTAMIC'),
  48.233 -"SBL": ('s', 'p', 'M', None, '"L-NAPHTHYL-1-ACETAMIDO'),
  48.234 -"ALQ": ('x', 'p', None, None, '"2-METHYL-PROPIONIC'),
  48.235 -"CAB": ('a', 'p', 'M', None, '4-CARBOXY-4-AMINOBUTANAL'),
  48.236 -"GPL": ('k', 'p', 'M', None, "LYSINE GUANOSINE-5'-MONOPHOSPHATE"),
  48.237 -"CAY": ('c', 'p', 'M', None, 'CARBOXYMETHYLENECYSTEINE'),
  48.238 -"DPR": ('p', 'p', 'M', None, 'D-PROLINE'),
  48.239 -"DPQ": ('y', 'p', 'M', None, '"(S)-2-AMINO-3-(4-HYDROXY-3-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC'),
  48.240 -"DPP": ('a', 'p', 'M', None, '"DIAMMINOPROPANOIC'),
  48.241 -"CAS": ('c', 'p', 'M', None, 'S-(DIMETHYLARSENIC)CYSTEINE'),
  48.242 -"NC1": ('s', 'p', 'M', None, '"NITROCEFIN'),
  48.243 -"IYR": ('y', 'p', 'M', None, '3-IODO-TYROSINE'),
  48.244 -"CAV": ('x', 'p', None, None, '"5-AMINO-6-CYCLOHEXYL-3,4-DIHYDROXY-2-ISOPROPYL-HEXANOIC'),
  48.245 -"DPY": ('n', 'd', None, None, "2-DEOXYRIBOFURANOSYL-PYRIDINE-2,6-DICARBOXYLIC ACID-5'-MONOPHOSPHATE"),
  48.246 -"5AA": ('a', 'd', 'M', None, '"N6-DIMETHYL-3\'-AMINO-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.247 -"DPB": ('t', 'd', 'M', None, '"(S)-1-[2\'-DEOXY-3\',5\'-O-(1-PHOSPHONO)BENZYLIDENE-B-D-THREO-PENTOFURANOSYL]THYMINE"'),
  48.248 -"CAL": ('x', 'p', None, None, '"5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOBUTYL-HEXANOIC'),
  48.249 -"DPN": ('f', 'p', 'M', None, 'D-PHENYLALANINE'),
  48.250 -"BAL": ('a', 'p', 'M', None, 'BETA-ALANINE'),
  48.251 -"DPL": ('p', 'p', 'M', None, '4-OXOPROLINE'),
  48.252 -"CAF": ('c', 'p', 'M', None, 'S-DIMETHYLARSINOYL-CYSTEINE'),
  48.253 -"DPH": ('f', 'p', 'M', None, 'DEAMINO-METHYL-PHENYLALANINE'),
  48.254 -"HIP": ('h', 'p', 'M', None, 'ND1-PHOSPHONOHISTIDINE'),
  48.255 -"LG": ('g', 'r', 'M', None, '"L-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.256 -"LA2": ('k', 'p', 'M', None, 'N~6~-[(6R)-6,8-DISULFANYLOCTANOYL]-L-LYSINE'),
  48.257 -"KOR": ('m', 'p', 'M', None, 'L-HOMOCYSTEINE-S-N-S-L-CYSTEINE'),
  48.258 -"LC": ('c', 'r', 'M', None, '"L-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.259 -"D4M": ('t', 'd', 'M', None, '"[(5R)-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-2,5-DIHYDROFURAN-2-YL]METHYL'),
  48.260 -"5MD": ('n', 'd', None, None, '"5-METHYL-2\'-DEOXYPSEUDOURIDINE"'),
  48.261 -"PGN": ('g', 'd', 'M', None, '"2\'-DEOXYGUANOSINE-3\',5\'-DIPHOSPHATE"'),
  48.262 -"NHL": ('e', 'p', 'M', None, '"(4S)-4-(2-NAPHTHYLMETHYL)-D-GLUTAMIC'),
  48.263 -"PGL": ('x', 'p', None, None, '"AMINOMETHYLENEPHOSPHINIC'),
  48.264 -"TA3": ('t', 'd', 'M', None, '(4S,5R)-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYL-1,3-DIAZABICYCLO[2.2.0]HEXAN-2-ONE'),
  48.265 -"XCP": ('x', 'p', None, None, '"(1S,2S)-2-AMINOCYCLOPENTANECARBOXYLIC'),
  48.266 -"DYS": ('c', 'p', 'M', None, 'S-[5-(2-AMINOETHYL)-2,3-DIHYDROXYPHENYL]-L-CYSTEINE'),
  48.267 -"HIA": ('h', 'p', 'M', None, '"L-HISTIDINE'),
  48.268 -"HIC": ('h', 'p', 'M', None, '4-METHYL-HISTIDINE'),
  48.269 -"PGP": ('g', 'r', 'M', None, '"GUANOSINE-3\',5\'-DIPHOSPHATE"'),
  48.270 -"AB7": ('x', 'p', None, None, '"ALPHA-AMINOBUTYRIC'),
  48.271 -"IIL": ('i', 'p', 'M', None, 'ISO-ISOLEUCINE'),
  48.272 -"DYG": ('x', 'p', None, None, '"(3S)-3-AMINO-3-[(4Z)-1-(CARBOXYMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-IMIDAZOL-2-YL]PROPANOIC'),
  48.273 -"PGY": ('g', 'p', 'M', None, 'PHENYLGLYCINE'),
  48.274 -"A": ('A', 'r', None, None, '"ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.275 -"23F": ('f', 'p', 'M', None, '"(2Z)-2-AMINO-3-PHENYLACRYLIC'),
  48.276 -"143": ('c', 'p', 'M', None, 'S-2,3-DIHYDRO-5-GLYCIN-2-YL-ISOXAZOL-3-YL-CYSTEINE'),
  48.277 -"HIQ": ('h', 'p', 'M', None, '1-[1,2-DIHYDROXY-1-(HYDROXYMETHYL)ETHYL]-L-HISTIDINE'),
  48.278 -"THR": ('T', 'p', None, None, 'THREONINE'),
  48.279 -"PG7": ('g', 'd', 'M', None, '"((2R,3R,5R)-5-(2-AMINO-6-HYDROXY-9H-PURIN-9-YL)-3-HYDROXY-TETRAHYDROFURAN-2-YL)METHYL'),
  48.280 -"ABR": ('a', 'd', 'M', None, '"(R)-(N-PHENYL-2-HYDROXY-ETHYL)-2\'-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.281 -"HIS": ('H', 'p', None, None, 'HISTIDINE'),
  48.282 -"ABT": ('n', 'd', None, None, "3'-AZIDO-3'-DEOXY-THYMIDINE-5'-ALPHA BORANO TRIPHOSPHATE"),
  48.283 -"PG1": ('s', 'p', 'M', None, '"PENICILLIN'),
  48.284 -"2LU": ('l', 'p', 'M', None, '"2-AMINO-4-METHYL-PENTANYL'),
  48.285 -"NZH": ('h', 'p', 'M', None, '(2S)-2-AMINO-3-[1-(1H-TETRAAZOL-5-YL)-1H-IMIDAZOL-4-YL]PROPANAL'),
  48.286 -"P5P": ('a', 'r', 'M', None, "PURINE RIBOSIDE-5'-MONOPHOSPHATE"),
  48.287 -"STY": ('y', 'p', 'M', None, '"TYROSINE-O-SULPHONIC'),
  48.288 -"PG9": ('g', 'p', 'M', None, 'D-PHENYLGLYCINE'),
  48.289 -"NCX": ('n', 'd', None, None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE-3-CARBOXAMIDE'),
  48.290 -"NCY": ('x', 'p', None, None, 'N-METHYLCYSTEINE'),
  48.291 -"CFL": ('c', 'd', 'M', None, '4-AMINO-1-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)PYRIMIDIN-2(1H)-ONE'),
  48.292 -"3MD": ('d', 'p', 'M', None, '"2S,3S-3-METHYLASPARTIC'),
  48.293 -"1AP": ('a', 'd', 'M', None, '"2,6-DIAMINOPURINE'),
  48.294 -"TFE": ('t', 'd', 'M', None, "2'-O-[2-(TRIFLUORO)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"),
  48.295 -"D3T": ('t', 'd', 'M', None, '"2\',3\'-DIDEOXY-THYMIDINE-5\'-TRIPHOSPHATE"'),
  48.296 -"TFA": ('x', 'p', None, None, '"TRIFLUOROACETYL'),
  48.297 -"BHD": ('d', 'p', 'M', None, '"BETA-HYDROXYASPARTIC'),
  48.298 -"ONL": ('x', 'p', None, None, '5-OXO-L-NORLEUCINE'),
  48.299 -"CFY": ('x', 'p', None, None, '"[(2S)-2-{(2R)-2-[(1S)-1-AMINO-2-PHENYLETHYL]-2-HYDROXY-2,5-DIHYDRO-1,3-THIAZOL-4-YL}-4-(4-HYDROXYBENZYL)-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.300 -"QLG": ('x', 'p', None, None, '"[(4Z)-2-[(1Z)-4-AMINO-4-OXOBUTANIMIDOYL]-4-(2-METHYLPROPYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.301 -"NCB": ('a', 'p', 'M', None, 'N-CARBAMOYL-ALANINE'),
  48.302 -"TFQ": ('f', 'p', 'M', None, '4-(2,2,2-TRIFLUOROETHYL)-L-PHENYLALANINE'),
  48.303 -"XXY": ('x', 'p', None, None, '2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-1-(CARBOXYMETHYL)-4-(1H-IMIDAZOL-5-YLMETHYL)-1H-IMIDAZOL-5-OLATE'),
  48.304 -"23S": ('x', 'p', None, None, '"(S)-2-AMINO-3-(6H-SELENOLO[2,3-B]-PYRROL-4-YL)-PROPIONIC'),
  48.305 -"0AU": ('n', 'd', None, None, "5-iodouridine 5'-(dihydrogen phosphate)"),
  48.306 -"GHP": ('g', 'p', 'M', None, '4-HYDROXYPHENYLGLYCINE'),
  48.307 -"MIA": ('a', 'r', 'M', None, '"2-METHYLTHIO-N6-ISOPENTENYL-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.308 -"AP7": ('a', 'd', 'M', None, "N1-PROTONATED ADENOSINE-5'-MONOPHOSPHATE"),
  48.309 -"OAD": ('n', 'r', None, None, "2'-O-ACETYL ADENOSINE-5-DIPHOSPHORIBOSE"),
  48.310 -"UDP": ('n', 'r', None, None, '"URIDINE-5\'-DIPHOSPHATE"'),
  48.311 -"GHG": ('q', 'p', 'M', None, 'GAMMA-HYDROXY-GLUTAMINE'),
  48.312 -"ZFB": ('x', 'p', None, None, '(3R)-3-{[(BENZYLOXY)CARBONYL]AMINO}-2-OXO-4-PHENYLBUTANE-1-DIAZONIUM'),
  48.313 -"C38": ('c', 'd', 'M', None, '"5-IODO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.314 -"HAQ": ('x', 'p', None, None, '"5-AMINO-4-OXO-1,2,4,5,6,7-HEXAHYDRO-AZEPINO[3,2,1-HI]INDOLE-2-CARBOXYLIC'),
  48.315 -"C34": ('c', 'd', 'M', None, '"N4-METHYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.316 -"C37": ('c', 'd', 'M', None, '"5-FLUORO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.317 -"ONE": ('u', 'r', 'M', None, '"1-(BETA-D-RIBOFURANOSYL)-PYRIDIN-4-ONE-5\'-PHOSPHATE"'),
  48.318 -"C31": ('c', 'r', 'M', None, "2'-O-3-AMINOPROPYL CYTIDINE-5'-MONOPHOSPHATE"),
  48.319 -"HAR": ('r', 'p', 'M', None, 'N-OMEGA-HYDROXY-L-ARGININE'),
  48.320 -"TY2": ('y', 'p', 'M', None, '3-AMINO-L-TYROSINE'),
  48.321 -"TY3": ('y', 'p', 'M', None, '3-HYDROXY-L-TYROSINE'),
  48.322 -"RT": ('n', 'r', None, None, '"RIBOSYLTHYMINE-5\'-MONOPHOSPHATE"'),
  48.323 -"4PE": ('c', 'd', 'M', None, '3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3H-PYRROLO[2,3-D]PYRIMIDIN-2-OL'),
  48.324 -"4PD": ('c', 'd', 'M', None, '3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-1,3-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE'),
  48.325 -"4PC": ('c', 'd', 'M', None, '"3-(2\'-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3,7-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE"'),
  48.326 -"SAC": ('s', 'p', 'M', None, 'N-ACETYL-SERINE'),
  48.327 -"PRO": ('P', 'p', None, None, 'PROLINE'),
  48.328 -"PRN": ('a', 'd', 'M', None, "PURINE 2'-DEOXYRIBO-5'-MONOPHOSPHATE"),
  48.329 -"6CL": ('k', 'p', 'M', None, '6-CARBOXYLYSINE'),
  48.330 -"4PH": ('f', 'p', 'M', None, '4-METHYL-L-PHENYLALANINE'),
  48.331 -"FGL": ('g', 'p', 'M', None, '"2-AMINOPROPANEDIOIC'),
  48.332 -"6CT": ('t', 'd', 'M', None, '"PHOSPHORIC'),
  48.333 -"IAS": ('d', 'p', 'M', None, '"BETA-ASPARTYL'),
  48.334 -"PRS": ('p', 'p', 'M', None, 'THIOPROLINE'),
  48.335 -"UR3": ('u', 'r', 'M', None, '"3-METHYLURIDINE-5\'-MONOPHOSHATE"'),
  48.336 -"SAR": ('g', 'p', 'M', None, 'SARCOSINE'),
  48.337 -"TCY": ('a', 'd', 'M', None, '"(2R,3AS,4AR,5AR,5BS)-2-(6-AMINO-9H-PURIN-9-YL)-3A-HYDROXYHEXAHYDROCYCLOPROPA[4,5]CYCLOPENTA[1,2-B]FURAN-5A(4H)-YL'),
  48.338 -"4F3": ('x', 'p', None, None, '"[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(4-FLUORO-1H-INDOL-3-YLMETHYL)-5-HYDROXY-IMIDAZOL-1-YL]-ACETIC'),
  48.339 -"TPG": ('g', 'r', 'M', None, '"2,2,7-TRIMETHYL-GUANOSINE-5\'-TRIPHOSPHATE-5\'-GUANOSINE"'),
  48.340 -"LAL": ('a', 'p', 'M', None, 'N,N-DIMETHYL-L-ALANINE'),
  48.341 -"TPC": ('c', 'd', 'M', None, "5'-THIO-2'-DEOXY-CYTOSINE PHOSPHONIC ACID"),
  48.342 -"PPU": ('a', 'r', 'M', None, '"PUROMYCIN-5\'-MONOPHOSPHATE"'),
  48.343 -"CHF": ('x', 'p', None, None, 'CYCLOHEXYLFLUOROSTATONE'),
  48.344 -"BFD": ('d', 'p', 'M', None, '"ASPARTATE'),
  48.345 -"TPO": ('t', 'p', 'M', None, 'PHOSPHOTHREONINE'),
  48.346 -"H5M": ('p', 'p', 'M', None, 'TRANS-3-HYDROXY-5-METHYLPROLINE'),
  48.347 -"AYG": ('x', 'p', None, None, '"[(4E)-2-[(1S)-1-AMINOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.348 -"TPL": ('w', 'p', 'M', None, 'TRYPTOPHANOL'),
  48.349 -"AYA": ('a', 'p', 'M', None, 'N-ACETYLALANINE'),
  48.350 -"6MC": ('a', 'r', 'M', None, '"CIS-N6-METHYL-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.351 -"6MA": ('a', 'r', 'M', None, '"N6-METHYL-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.352 -"GSR": ('g', 'd', 'M', None, "2'-DEOXY-N2-(R)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"),
  48.353 -"EYS": ('x', 'p', None, None, 'S-SELANYLCYSTEINAL'),
  48.354 -"LPL": ('x', 'p', None, None, 'LEU-HYDROXYETHYLENE-LEU'),
  48.355 -"6MZ": ('n', 'r', None, None, '"N6-METHYLADENOSINE-5\'-MONOPHOSPHATE"'),
  48.356 -"PR3": ('c', 'p', 'M', None, 'S,S-PROPYLTHIOCYSTEINE'),
  48.357 -"2ST": ('t', 'd', 'M', None, "5-METHYL-2'-SE-METHYL-2'-SELENOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.358 -"0NC": ('a', 'p', 'M', None, 'N-METHYL-L-ALANINAMIDE'),
  48.359 -"MRG": ('g', 'd', 'M', None, '"N2-(3-MERCAPTOPROPYL)-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.360 -"KYN": ('a', 'p', 'M', None, 'KYNURENINE'),
  48.361 -"G2S": ('g', 'd', 'M', None, '"GUANOSINE-5\'-DITHIOPHOSPHORATE"'),
  48.362 -"PYX": ('c', 'p', 'M', None, 'S-[S-THIOPYRIDOXAMINYL]CYSTEINE'),
  48.363 -"PYY": ('n', 'r', None, None, '"D-RIBOFURANOSYL-BENZENE-5\'-MONOPHOSPHATE"'),
  48.364 -"TYN": ('y', 'p', 'M', None, 'AMINOBENZOFURAZAN-O-TYROSINE'),
  48.365 -"TYO": ('y', 'p', 'M', None, '"(4Z,6E)-2-AMINO-7-HYDROPEROXY-4-[(E)-2-HYDROXYVINYL]HEPTA-4,6-DIENOIC'),
  48.366 -"KAG": ('g', 'r', 'M', None, "2'-DEOXY-N-[(1S)-1-METHYL-3-OXOPROPYL]GUANOSINE 5'-PHOSPHATE"),
  48.367 -"IPG": ('g', 'p', 'M', None, '"N-ISOPROPYL'),
  48.368 -"GH3": ('g', 'r', 'M', None, '"3\'-DEOXY-GUANOSINE-5\'-TRIPHOSPHATE"'),
  48.369 -"APP": ('x', 'p', None, None, '1-ACETYL-2-CARBOXYPIPERIDINE'),
  48.370 -"IPN": ('n', 'd', None, None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-5-IODOURACIL'),
  48.371 -"TYB": ('y', 'p', 'M', None, 'TYROSINAL'),
  48.372 -"FAG": ('n', 'd', None, None, "[1',2'-DIDEOXY[2-AMINO-5-([9-HYDROXY-AFLATOXINB2-8-YL]-FORMYL-AMINO)-6-OXO-1,6-IHYDRO-PYRIMIDIN-4-YLAMINO]-RIBOFURANOSE]-5-MONOPHOSPHATE GROUP"),
  48.373 -"DBY": ('y', 'p', 'M', None, '"3,5'),
  48.374 -"APK": ('k', 'p', 'M', None, '"5\'-O-[(S)-{[(5S)-5-AMINO-6-OXOHEXYL]AMINO}(HYDROXY)PHOSPHORYL]ADENOSINE"'),
  48.375 -"APH": ('a', 'p', 'M', None, 'P-AMIDINOPHENYL-3-ALANINE'),
  48.376 -"API": ('k', 'p', 'M', None, '"2,6-DIAMINOPIMELIC'),
  48.377 -"TYX": ('x', 'p', None, None, 'S-(2-ANILINO-2-OXOETHYL)-L-CYSTEINE'),
  48.378 -"APO": ('x', 'p', None, None, '"D-2-AMINO-3-PHOSPHONO-PROPIONIC'),
  48.379 -"APM": ('x', 'p', None, None, 'M-AMIDINOPHENYL-3-ALANINE'),
  48.380 -"TYT": ('y', 'p', 'M', None, '"TYROSINE'),
  48.381 -"TYU": ('n', 'r', None, None, 'TETRAHYDROURIDINE'),
  48.382 -"DBS": ('s', 'p', 'M', None, '"2-(2,3-DIHYDROXY-BENZOYLAMINO)-3-HYDROXY-PROPIONIC'),
  48.383 -"DBU": ('a', 'p', 'M', None, '"(2E)-2-AMINOBUT-2-ENOIC'),
  48.384 -"TYQ": ('y', 'p', 'M', None, '3-AMINO-6-HYDROXY-TYROSINE'),
  48.385 -"TYR": ('Y', 'p', None, None, 'TYROSINE'),
  48.386 -"APE": ('x', 'p', None, None, '"(1-AMINO-2-PHENYL-ETHYL)-CARBAMIC'),
  48.387 -"BT5": ('n', 'r', None, None, 'BIOTINYL-5-AMP'),
  48.388 -"KST": ('k', 'p', 'M', None, 'N~6~-(5-CARBOXY-3-THIENYL)-L-LYSINE'),
  48.389 -"2AD": ('x', 'p', None, None, '"2\'-AMINO-2\'-DEOXYADENOSINE"'),
  48.390 -"ARG": ('R', 'p', None, None, 'ARGININE'),
  48.391 -"HDP": ('n', 'd', None, None, '"[(1S,6S)-6-HYDROXY-4-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOHEX-2-EN-1-YL]METHYL'),
  48.392 -"CZ2": ('c', 'p', 'M', None, 'S-(DIHYDROXYARSINO)CYSTEINE'),
  48.393 -"2AO": ('x', 'p', None, None, '(2S)-2-AMINOHEXAN-1-OL'),
  48.394 -"G33": ('g', 'd', 'M', None, "8-METHYL-2'-DEOXYGUANOSINE 3'-MONOPHOSPHATE"),
  48.395 -"AHO": ('a', 'p', 'M', None, 'N-ACETYL-N-HYDROXY-L-ORNITHINE'),
  48.396 -"P2U": ('n', 'd', None, None, '"2\'-DEOXY-PSEUDOURIDINE-5\'MONOPHOSPHATE"'),
  48.397 -"P2T": ('t', 'd', 'M', None, "2'-O-PROPYL THYMIDINE-5-MONOPHOSPHATE"),
  48.398 -"MG1": ('g', 'd', 'M', None, "2'-DEOXY-1-METHYLGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.399 -"G32": ('g', 'd', 'M', None, '"O6-METHYL-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.400 -"2AR": ('a', 'd', 'M', None, '"2\'-DEOXYARISTEROMYCIN-5\'-PHOSPHATE"'),
  48.401 -"2AS": ('x', 'p', None, None, '"(2S,3S)-3-AMINO-2-METHYL-4-OXOBUTANOIC'),
  48.402 -"G46": ('g', 'r', 'M', None, '"2\'-DEOXY-GUANOSINE-5\'-MONOTHIOPHOSPHATE"'),
  48.403 -"G47": ('g', 'd', 'M', None, '"N2-ETHANETHIOL-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.404 -"G48": ('g', 'r', 'M', None, '"2\'-O-METHYOXYETHYL-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.405 -"G49": ('g', 'd', 'M', None, '"N2-METHYL-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.406 -"OCS": ('c', 'p', 'M', None, '"CYSTEINESULFONIC'),
  48.407 -"P2Y": ('p', 'p', 'M', None, '(2S)-PYRROLIDIN-2-YLMETHYLAMINE'),
  48.408 -"PHA": ('f', 'p', 'M', None, 'PHENYLALANINAL'),
  48.409 -"NLQ": ('q', 'p', 'M', None, 'N~2~-ACETYL-L-GLUTAMINE'),
  48.410 -"NLP": ('l', 'p', 'M', None, '"(1-AMINO-PENTYL)-PHOSPHONIC'),
  48.411 -"MLL": ('l', 'p', 'M', None, '"METHYL'),
  48.412 -"CEA": ('c', 'p', 'M', None, 'S-HYDROXY-CYSTEINE'),
  48.413 -"HV5": ('a', 'p', 'M', None, 'TERT-BUTYLALANINE'),
  48.414 -"HMR": ('r', 'p', 'M', None, 'BETA-HOMOARGININE'),
  48.415 -"HRG": ('r', 'p', 'M', None, 'L-HOMOARGININE'),
  48.416 -"CSP": ('c', 'p', 'M', None, 'S-PHOSPHOCYSTEINE'),
  48.417 -"FA2": ('a', 'd', 'M', None, '"5-(6-AMINO-9H-PURIN-9-YL)-4-HYDROXYTETRAHYDROFURAN-3-YL'),
  48.418 -"BMP": ('n', 'r', None, None, '"6-HYDROXYURIDINE-5\'-PHOSPHATE"'),
  48.419 -"NLE": ('l', 'p', 'M', None, 'NORLEUCINE'),
  48.420 -"RTP": ('n', 'r', None, None, '"RIBAVIRIN'),
  48.421 -"BMT": ('t', 'p', 'M', None, '4-METHYL-4-[(E)-2-BUTENYL]-4,N-METHYL-THREONINE'),
  48.422 -"G38": ('g', 'd', 'M', None, '"3\'-AMINO-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.423 -"CS4": ('c', 'p', 'M', None, 'S-[3-(3,4-DICHLOROPHENYL)-3-OXOPROPYL]-L-CYSTEINE'),
  48.424 -"2FI": ('n', 'd', None, None, '"2\'-FLUORO-2\'-DEOXYINOSINE"'),
  48.425 -"TTS": ('y', 'p', 'M', None, '3-((3E)-4-HYDROXY-3-{[2-(4-HYDROXYPHENYL)ETHYL]IMINO}-6-OXOCYCLOHEXA-1,4-DIEN-1-YL)ALANINE'),
  48.426 -"CS1": ('c', 'p', 'M', None, 'S-(2-ANILINYL-SULFANYL)-CYSTEINE'),
  48.427 -"CS0": ('c', 'p', 'M', None, 'S-(2-HYDROXYETHYL)-L-CYSTEINE'),
  48.428 -"NLO": ('l', 'p', 'M', None, 'O-METHYL-L-NORLEUCINE'),
  48.429 -"NLN": ('l', 'p', 'M', None, '"NORLEUCINE'),
  48.430 -"SHP": ('g', 'p', 'M', None, '(4-HYDROXYMALTOSEPHENYL)GLYCINE'),
  48.431 -"CSL": ('c', 'd', 'M', None, '"(D)-2\'-METHYLSELENYL-2\'-DEOXYCYTIDINE-5\'-PHOSPHATE"'),
  48.432 -"SHR": ('k', 'p', 'M', None, '"N-(5-AMINO-5-CARBOXYPENTYL)GLUTAMIC'),
  48.433 -"OXX": ('d', 'p', 'M', None, '"OXALYL-ASPARTYL'),
  48.434 -"B3Y": ('y', 'p', 'M', None, '"(3S)-3-AMINO-4-(4-HYDROXYPHENYL)BUTANOIC'),
  48.435 -"DLE": ('l', 'p', 'M', None, 'D-LEUCINE'),
  48.436 -"PYA": ('a', 'p', 'M', None, '3-(1,10-PHENANTHROL-2-YL)-L-ALANINE'),
  48.437 -"CSE": ('c', 'p', 'M', None, 'SELENOCYSTEINE'),
  48.438 -"5FA": ('a', 'r', 'M', None, '"ADENOSINE-5\'-PENTAPHOSPHATE"'),
  48.439 -"GCK": ('c', 'd', 'M', None, "PHOSPHORIC ACID 9-(2-GUANIDINOETHOXY-3-(2-DEOXY-BETA-D-ERYTHROPENTOFURANOSYL))-3H-PYRIMIDO-[5,4-B][1,4]-BENZOOXAZIN-2-ONE]-5'-ESTER"),
  48.440 -"5FC": ('c', 'd', 'M', None, '"5-FORMYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.441 -"GGL": ('e', 'p', 'M', None, '"GAMMA-GLUTAMIC'),
  48.442 -"B3Q": ('x', 'p', None, None, '"(3S)-3,6-DIAMINO-6-OXOHEXANOIC'),
  48.443 -"B3S": ('s', 'p', 'M', None, '"(3R)-3-AMINO-4-HYDROXYBUTANOIC'),
  48.444 -"B3L": ('x', 'p', None, None, '"(3S)-3-AMINO-5-METHYLHEXANOIC'),
  48.445 -"A47": ('a', 'd', 'M', None, "N6-METHOXY ADENOSINE 5'-MONOPHOSPHATE"),
  48.446 -"A44": ('a', 'r', 'M', None, "2'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"),
  48.447 -"5MU": ('u', 'r', 'M', None, "5-METHYLURIDINE 5'-MONOPHOSPHATE"),
  48.448 -"CSY": ('x', 'p', None, None, '"[4-(4-HYDROXY-BENZYL)-2-(2-HYDROXY-1-METHYL-ETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC'),
  48.449 -"A43": ('a', 'd', 'M', None, "3'-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"),
  48.450 -"A40": ('a', 'd', 'M', None, "N2-METHYL 2'-DEOXYADENOSINE 5'-MONOPHOSPHATE"),
  48.451 -"B3K": ('k', 'p', 'M', None, '"(3S)-3,7-DIAMINOHEPTANOIC'),
  48.452 -"B3D": ('d', 'p', 'M', None, '"3-AMINOPENTANEDIOIC'),
  48.453 -"B3E": ('e', 'p', 'M', None, '"(3S)-3-AMINOHEXANEDIOIC'),
  48.454 -"DLY": ('k', 'p', 'M', None, 'D-LYSINE'),
  48.455 -"MTU": ('a', 'r', 'M', None, '9-BETA-D-RIBOFURANOSYL-9H-PURIN-2-AMINE'),
  48.456 -"B3A": ('a', 'p', 'M', None, '"(3S)-3-AMINOBUTANOIC'),
  48.457 -"CSS": ('c', 'p', 'M', None, 'S-MERCAPTOCYSTEINE'),
  48.458 -"CSR": ('c', 'p', 'M', None, 'S-ARSONOCYSTEINE'),
  48.459 -"CZZ": ('c', 'p', 'M', None, 'THIARSAHYDROXY-CYSTEINE'),
  48.460 -"N10": ('s', 'p', 'M', None, 'O-[(HEXYLAMINO)CARBONYL]-L-SERINE'),
  48.461 -"MGG": ('r', 'p', 'M', None, '"2-(2-CARBOXY-ACETYLAMINO)-5-GUANIDINO-PENTANOIC'),
  48.462 -"A35": ('a', 'd', 'M', None, "2-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"),
  48.463 -"AFG": ('g', 'd', 'M', None, '"N-(5\'-PHOSPHO-2\'-DEOXYGUANOSIN-8-YL)-2-AMINOFLUORENE"'),
  48.464 -"BTR": ('w', 'p', 'M', None, '6-BROMO-TRYPTOPHAN'),
  48.465 -"SSU": ('u', 'r', 'M', None, '"URIDINE-5\'-PHOSPHOROTHIOATE"'),
  48.466 -"70U": ('u', 'r', 'M', None, '"5-(O-METHYLACETO)-2-THIO-2-DEOXY-URIDINE-5\'-MONOPHOSPHATE"'),
  48.467 -"A34": ('a', 'd', 'M', None, "N6-METHYL DEOXYADENOSINE 5'-MONOPHOSPHATE"),
  48.468 -"MGN": ('q', 'p', 'M', None, '2-METHYL-GLUTAMINE'),
  48.469 -"XCL": ('c', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(CYTOSIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'),
  48.470 -"G4P": ('n', 'r', None, None, '"GUANOSINE-5\',3\'-TETRAPHOSPHATE"'),
  48.471 -"XCS": ('n', 'd', None, None, '(1R)-1-(4-AMINO-6-METHYL-2-OXO-1,2-DIHYDROQUINAZOLIN-8-YL)-1,4-ANHYDRO-2-DEOXY-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL'),
  48.472 -"MGQ": ('a', 'r', 'M', None, '"7-BENZYL'),
  48.473 -"ABS": ('a', 'd', 'M', None, '"(S)-(N-PHENYL-2-HYDROXY-ETHYL)-2\'-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.474 -"BTA": ('l', 'p', 'M', None, '4-DEMETHYL-LEUCINE'),
  48.475 -"MGV": ('g', 'r', 'M', None, '"P-FLUORO-7-BENZYL'),
  48.476 -"XCT": ('c', 'd', 'M', None, '4-AMINO-1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)PYRIMIDIN-2(1H)-ONE'),
  48.477 -"TTQ": ('w', 'p', 'M', None, '6-AMINO-7-HYDROXY-L-TRYPTOPHAN'),
  48.478 -"MGY": ('g', 'p', 'M', None, 'N-METHYLGLYCINE'),
  48.479 -"XCY": ('c', 'd', 'M', None, '"{5-[4-{[4-(AMINOMETHYL)BENZYL]AMINO}-2-OXOPYRIMIDIN-1(2H)-'),
  48.480 -"NNH": ('r', 'p', 'M', None, 'NOR-N-OMEGA-HYDROXY-L-ARGININE'),
  48.481 -"TBG": ('g', 'p', 'M', None, '"T-BUTYL'),
  48.482 -"ATD": ('t', 'd', 'M', None, '"THYMIDINE-3\'-PHOSPHATE"'),
  48.483 -"U8U": ('u', 'r', 'M', None, '"5-METHYLAMINOMETHYL-2-THIOURIDINE-5\'-MONOPHOSPHATE"'),
  48.484 -"SNC": ('c', 'p', 'M', None, 'S-NITROSO-CYSTEINE'),
  48.485 -"ALG": ('r', 'p', 'M', None, '"GUANIDINOBUTYRYL'),
  48.486 -"M1G": ('g', 'd', 'M', None, '"3-(2-DEOXY-BETA-D-RIBOFURANOSYL)-PYRIDO[5,6-A]-PURINE-10-ONE-5\'-MONOPHOSPHATE"'),
  48.487 -"ATL": ('t', 'd', 'M', None, '"[(1S,3R,4S,7R)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'),
  48.488 -"ATM": ('t', 'd', 'M', None, '"3\'-AZIDO-3\'-DEOXYTHYMIDINE-5\'-MONOPHOSPHATE"'),
  48.489 -"HPC": ('f', 'p', 'M', None, '3-AMINO-4-PHENYL-BUTAN-2-ONE'),
  48.490 -"TP1": ('t', 'd', 'M', None, '2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE'),
  48.491 -"XUG": ('g', 'd', 'M', None, "2'-SE-METHYL-2'-SELENOGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.492 -"ISO": ('x', 'p', None, None, 'PARA-ISOPROPYLANILINE'),
  48.493 -"3MM": ('x', 'p', None, None, '(1R)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM'),
  48.494 -"175": ('x', 'p', None, None, '3,5-DIHYDRO-5-METHYLIDENE-4H-IMIDAZOL-4-ON'),
  48.495 -"2TY": ('y', 'p', 'M', None, '2-HYDROXY-5-{[(1E)-2-PHENYLETHYLIDENE]AMINO}-L-TYROSINE'),
  48.496 -"CSB": ('c', 'p', 'M', None, '"CYS'),
  48.497 -"IT1": ('k', 'p', 'M', None, '(E)-N~6~-({3-HYDROXY-2-METHYL-5-[(PHOSPHONOOXY)METHYL]PYRIDIN-4-YL}METHYLIDENE)-L-LYSINE'),
  48.498 -"UN1": ('x', 'p', None, None, '"2-AMINOHEXANEDIOIC'),
  48.499 -"UN2": ('x', 'p', None, None, '"2-AMINO-4,4-DIHYDROXYBUTANOIC'),
  48.500 -"ASP": ('D', 'p', None, None, '"ASPARTIC'),
  48.501 -"DTH": ('t', 'p', 'M', None, 'D-THREONINE'),
  48.502 -"SER": ('S', 'p', None, None, 'SERINE'),
  48.503 -"ASU": ('n', 'd', None, None, '"4\'-THIO-2\'4\'-DIDEOXYRIBOFURANOSE-5\'-PHOSPHATE"'),
  48.504 -"SEP": ('s', 'p', 'M', None, 'PHOSPHOSERINE'),
  48.505 -"LNT": ('x', 'p', None, None, 'N-[(2S)-2-AMINO-1,1-DIHYDROXY-4-METHYLPENTYL]-L-THREONINE'),
  48.506 -"A39": ('a', 'r', 'M', None, "2'-O-METHYL-ADENOSINE 5'-MONOPHOSPHATE"),
  48.507 -"DGI": ('g', 'd', 'M', None, '"2\'-DEOXYGUANOSINE-5\'-DIPHOSPHATE"'),
  48.508 -"DGN": ('q', 'p', 'M', None, 'D-GLUTAMINE'),
  48.509 -"DGL": ('e', 'p', 'M', None, '"D-GLUTAMIC'),
  48.510 -"SEG": ('a', 'p', 'M', None, 'HYDROXYALANINE'),
  48.511 -"ASB": ('d', 'p', 'M', None, '"ASPARTIC'),
  48.512 -"ASA": ('d', 'p', 'M', None, '"ASPARTIC'),
  48.513 -"SEC": ('a', 'p', 'M', None, '"2-AMINO-3-SELENINO-PROPIONIC'),
  48.514 -"SEB": ('s', 'p', 'M', None, 'O-BENZYLSULFONYL-SERINE'),
  48.515 -"0A9": ('f', 'p', 'M', None, '"METHYL'),
  48.516 -"ASK": ('d', 'p', 'M', None, '"DEHYDROXYMETHYLASPARTIC'),
  48.517 -"PVH": ('h', 'p', 'M', None, 'HISTIDINE-METHYL-ESTER'),
  48.518 -"ASI": ('d', 'p', 'M', None, 'L-ISO-ASPARTATE'),
  48.519 -"SEL": ('s', 'p', 'M', None, '2-AMINO-1,3-PROPANEDIOL'),
  48.520 -"ASN": ('N', 'p', None, None, 'ASPARAGINE'),
  48.521 -"ASM": ('x', 'p', None, None, '"2-AMINO-4-OXO-4(1H-PYRROL-1-YL)BUTANOIC'),
  48.522 -"ASL": ('d', 'p', 'M', None, '"ASPARTIC'),
  48.523 -"AS2": ('d', 'p', 'M', None, '"(2R)-2-AMINO-4-OXOBUTANOIC'),
  48.524 -"IMC": ('c', 'd', 'M', None, '"N1-[2-DEOXY-RIBOFURANOSYL]-[2-AMINO-5-METHYL-4-OXO-4H-PYRIMIDINE]-5\'-MONOPHOSPHATE"'),
  48.525 -"A3P": ('a', 'r', 'M', None, '"ADENOSINE-3\'-5\'-DIPHOSPHATE"'),
  48.526 -"CLH": ('k', 'p', 'M', None, '"2-AMINO-6-[2-(2-OXO-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC'),
  48.527 -"3DR": ('n', 'd', None, None, '"1\',2\'-DIDEOXYRIBOFURANOSE-5\'-PHOSPHATE"'),
  48.528 -"FRD": ('x', 'p', None, None, '1-PHENYL-2-AMINOPROPANE'),
  48.529 -"CLD": ('a', 'p', 'M', None, '"D-PARA-CHLOROPHENYL-1-ACTEAMIDOBORONIC'),
  48.530 -"CLE": ('l', 'p', 'M', None, '"LEUCINE'),
  48.531 -"PDU": ('n', 'd', None, None, '"5(1-PROPYNYL)-2\'-DEOXYURIDINE-5-MONOPHOSPHATE"'),
  48.532 -"CLG": ('k', 'p', 'M', None, '"2-AMINO-6-[2-(2-AMINOOXY-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC'),
  48.533 -"BBC": ('c', 'p', 'M', None, '3-[(4-AMINOBUTYL)SULFINYL]-2-IMINOPROPAN-1-OL'),
  48.534 -"TFO": ('a', 'd', 'M', None, '"[2-(6-AMINO-9H-PURIN-9-YL)-1-METHYLETHOXY]METHYLPHOSPHONIC'),
  48.535 -"CLB": ('a', 'p', 'M', None, '"D-PARA-CHLOROPHENYL-1-ACETAMIDOBORONIC'),
  48.536 -"TLC": ('t', 'd', 'M', None, '"2-O,3-ETHDIYL-ARABINOFURANOSYL-THYMINE-5\'-MONOPHOSPHATE"'),
  48.537 -"2DM": ('n', 'd', None, None, '"2-HYDROXY-3-(PYREN-1-YLMETHOXY)PROPYL'),
  48.538 -"A3A": ('a', 'd', 'M', None, '"2\'DEOXY-ALPHA-ANOMERIC-ADENOSINE-5\'-PHOSPHATE"'),
  48.539 -"PDL": ('x', 'p', None, None, '"N-(5\'-PHOSPHOPYRIDOXYL)-L-ALANINE"'),
  48.540 -"3DA": ('a', 'd', 'M', None, '"3\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE"'),
  48.541 -"GT9": ('c', 'p', 'M', None, 'S-NONYL-CYSTEINE'),
  48.542 -"CLV": ('x', 'p', None, None, '"{(2S)-2-[(1S)-1-AMINOETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'),
  48.543 -"PDD": ('x', 'p', None, None, '"N-(5\'-PHOSPHOPYRIDOXYL)-D-ALANINE"'),
  48.544 -"1TQ": ('w', 'p', 'M', None, '6-(FORMYLAMINO)-7-HYDROXY-L-TRYPTOPHAN'),
  48.545 -"SEM": ('x', 'p', None, None, '3-AMINO-4-OXYBENZYL-2-BUTANONE'),
  48.546 -"TGP": ('g', 'd', 'M', None, "5'-THIO-2'-DEOXY-GUANOSINE PHOSPHONIC ACID"),
  48.547 -"OMC": ('c', 'r', 'M', None, '"O2\'-METHYLYCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.548 -"AEA": ('x', 'p', None, None, '"(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETIC'),
  48.549 -"OMG": ('g', 'r', 'M', None, '"O2\'-METHYLGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.550 -"H2U": ('u', 'r', 'M', None, '"5,6-DIHYDROURIDINE-5\'-MONOPHOSPHATE"'),
  48.551 -"A38": ('a', 'd', 'M', None, "8-OXY DEOXYADENOSINE-5'-MONOPHOSPHATE"),
  48.552 -"DTY": ('y', 'p', 'M', None, 'D-TYROSINE'),
  48.553 -"PVL": ('x', 'p', None, None, '"PYRUVOYL'),
  48.554 -"ABA": ('a', 'p', 'M', None, '"ALPHA-AMINOBUTYRIC'),
  48.555 -"OMU": ('u', 'r', 'M', None, "O2'-METHYLURIDINE 5'-MONOPHOSPHATE"),
  48.556 -"OMT": ('m', 'p', 'M', None, 'S-DIOXYMETHIONINE'),
  48.557 -"CRF": ('x', 'p', None, None, '[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETALDEHYDE'),
  48.558 -"FPA": ('f', 'p', 'M', None, '"1,1'),
  48.559 -"VMS": ('x', 'p', None, None, '"5\'O-[N-(L-VALYL)SULPHAMOYL]ADENOSINE"'),
  48.560 -"T11": ('f', 'p', 'M', None, '4-[3-(TRIFLUOROMETHYL)DIAZIRIDIN-3-YL]-L-PHENYLALANINE'),
  48.561 -"2MR": ('r', 'p', 'M', None, '"N3,'),
  48.562 -"DNE": ('l', 'p', 'M', None, 'D-NORLEUCINE'),
  48.563 -"5IC": ('c', 'r', 'M', None, '"5-IODO-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.564 -"RC7": ('x', 'p', None, None, '{(2R)-4-(4-HYDROXYBENZYL)-2-[2-(1H-IMIDAZOL-4-YL)ETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETALDEHYDE'),
  48.565 -"BPE": ('c', 'p', 'M', None, '(2S)-2-AMINO-3-[(3-AMINOPROPYL)SULFANYL]PROPAN-1-OL'),
  48.566 -"2MT": ('p', 'p', 'M', None, '"(4R)-2,2-DIMETHYL-1,3-THIAZOLIDINE-4-CARBOXYLIC'),
  48.567 -"2MU": ('u', 'r', 'M', None, '"2\',5-DIMETHYLURIDINE-5\'-MONOPHOSPHATE"'),
  48.568 -"DG": ('G', 'd', None, None, '"2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.569 -"UCL": ('n', 'd', None, None, "5-CHLORO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.570 -"DA": ('A', 'd', None, None, '"2\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE"'),
  48.571 -"AEI": ('d', 'p', 'M', None, '"THREONINE-ASPARTIC'),
  48.572 -"N5M": ('c', 'r', 'M', None, "5-nitrocytidine 5'-(dihydrogen phosphate)"),
  48.573 -"5IU": ('n', 'd', None, None, '"5-IODO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
  48.574 -"2MG": ('g', 'r', 'M', None, '"2N-METHYLGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.575 -"AAR": ('r', 'p', 'M', None, 'ARGININEAMIDE'),
  48.576 -"DT": ('T', 'd', None, None, '"THYMIDINE-5\'-MONOPHOSPHATE"'),
  48.577 -"DU": ('n', 'd', None, None, '"2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
  48.578 -"HV8": ('x', 'p', None, None, 'BENZYL-2-AMINO-PARAMETHOXY-BENZYLSTATINE'),
  48.579 -"MC1": ('s', 'p', 'M', None, '"METHICILLIN'),
  48.580 -"2FM": ('m', 'p', 'M', None, 'S-(DIFLUOROMETHYL)HOMOCYSTEINE'),
  48.581 -"CYS": ('C', 'p', None, None, 'CYSTEINE'),
  48.582 -"CYR": ('c', 'p', 'M', None, 'N~5~-[{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}(IMINIO)METHYL]-L-ORNITHINATE'),
  48.583 -"BIL": ('x', 'p', None, None, '"(3R,4S)-3-AMINO-4-METHYLHEXANOIC'),
  48.584 -"PCA": ('e', 'p', 'M', None, '"PYROGLUTAMIC'),
  48.585 -"GYC": ('x', 'p', None, None, '"[(4Z)-2-[(1R)-1-AMINO-2-MERCAPTOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.586 -"SMC": ('c', 'p', 'M', None, 'S-METHYLCYSTEINE'),
  48.587 -"0AC": ('g', 'p', 'M', None, '"(4S,5S)-5-AMINO-4-HYDROXY-6-PHENYLHEXANOIC'),
  48.588 -"BIF": ('f', 'p', 'M', None, '"(R)-2-AMINO-3-(4-PHENYLCYCLOHEXYL)PROPANOIC'),
  48.589 -"2BU": ('a', 'd', 'M', None, "(2S,3S)-N6-(2,3,4-TRIHYDROXYBUTYL)-2'-DEOXYADENOSINE MONO PHOSPHORIC ACID"),
  48.590 -"2BT": ('t', 'd', 'M', None, '"2\'-O-BUTYL-THYMIDINE"'),
  48.591 -"5PC": ('c', 'd', 'M', None, '"5(1-PROPYNYL)-2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.592 -"S2M": ('t', 'd', 'M', None, '"2\'-O-[2-(METHOXY)ETHYL]-2-THIOTHYMIDINE-5\'-MONOPHOSPHATE"'),
  48.593 -"5MC": ('c', 'r', 'M', None, '"5-METHYLCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.594 -"S2P": ('a', 'p', 'M', None, '"(2S)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC'),
  48.595 -"C1X": ('k', 'p', 'M', None, '(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)DIHYDRO-2H-THIOPYRAN-3(4H)-YLIDENE]-L-LYSINE'),
  48.596 -"CYG": ('c', 'p', 'M', None, '"2-AMINO-4-(AMINO-3-OXO-PROPYLSULFANYLCARBONYL)-BUTYRIC'),
  48.597 -"CYF": ('c', 'p', 'M', None, '"5-[2-(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETYLAMINO]-2-(3,6-DIHYDROXY-9,9A-DIHYDRO-3H-XANTHEN-9-YL)-BENZOIC'),
  48.598 -"5PY": ('t', 'd', 'M', None, '"1-(2\'-DEOXY-5\'-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYLPYRIMIDIN-2(1H)-ONE"'),
  48.599 -"CYJ": ('x', 'p', None, None, '(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)PIPERIDIN-3-YLIDENE]-L-LYSINE'),
  48.600 -"BIU": ('i', 'p', 'M', None, '5-BROMO-L-ISOLEUCINE'),
  48.601 -"ENP": ('n', 'r', None, None, 'ETHENO-NADP'),
  48.602 -"AS9": ('x', 'p', None, None, '"N-[HYDROXY(METHYL)PHOSPHORYL]-L-ASPARTIC'),
  48.603 -"CYM": ('c', 'p', 'M', None, 'S-METHYLCYSTEINE'),
  48.604 -"CY3": ('c', 'p', 'M', None, '2-AMINO-3-MERCAPTO-PROPIONAMIDE'),
  48.605 -"DHN": ('v', 'p', 'M', None, '"5-HYDROXY'),
  48.606 -"CY1": ('c', 'p', 'M', None, 'ACETAMIDOMETHYLCYSTEINE'),
  48.607 -"CY0": ('c', 'p', 'M', None, 'S-{3-[(4-ANILINOQUINAZOLIN-6-YL)AMINO]-3-OXOPROPYL}-L-CYSTEINE'),
  48.608 -"SLZ": ('k', 'p', 'M', None, 'L-THIALYSINE'),
  48.609 -"CY4": ('c', 'p', 'M', None, 'S-BUTYRYL-CYSTEIN'),
  48.610 -"T5S": ('t', 'd', 'M', None, "2'-deoxy-5-(methylselanyl)uridine 5'-phosphate"),
  48.611 -"SLR": ('p', 'p', 'M', None, '(3R,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE'),
  48.612 -"IML": ('i', 'p', 'M', None, 'N-METHYL-ISOLEUCINE'),
  48.613 -"32S": ('x', 'p', None, None, '"(S)-2-AMINO-3-(4H-SELENOLO[3,2-B]-PYRROL-6-YL)-PROPIONIC'),
  48.614 -"CSX": ('c', 'p', 'M', None, '"S-OXY'),
  48.615 -"MFC": ('x', 'p', None, None, '5-[1-(3-FLUORO-4-HYDROXY-PHENYL)-METH-(Z)-YLIDENE]-3,5-DIHYDRO-IMIDAZOL-4-ONE'),
  48.616 -"32T": ('x', 'p', None, None, '"(S)-2-AMINO-3-(4H-THIENO[3,2-B]-PYRROL-6-YL)-PROPIONIC'),
  48.617 -"TFT": ('t', 'd', 'M', None, '"(L)-ALPHA-THREOFURANOSYL-THYMINE-3\'-MONOPHOSPHATE"'),
  48.618 -"MLZ": ('k', 'p', 'M', None, 'N-METHYL-LYSINE'),
  48.619 -"DHU": ('u', 'r', 'M', None, '"5,6-DIHYDROURIDINE-5\'-PHOSPHATE"'),
  48.620 -"ASQ": ('d', 'p', 'M', None, 'PHOSPHOASPARTATE'),
  48.621 -"SLA": ('p', 'p', 'M', None, '(3S,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE'),
  48.622 -"DHP": ('x', 'p', None, None, '3-DECYL-2,5-DIOXO-4-HYDROXY-3-PYRROLINE'),
  48.623 -"E1X": ('a', 'd', 'M', None, '"PHOSPHORIC'),
  48.624 -"XGL": ('g', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(GUANIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'),
  48.625 -"MCL": ('k', 'p', 'M', None, 'NZ-(1-CARBOXYETHYL)-LYSINE'),
  48.626 -"TLB": ('n', 'r', None, None, '"2\'-O,3\'-C-METHYLENE-ARABINOFURANOSYL-THYMINE-5\'-MONOPHOSPHATE"'),
  48.627 -"MCG": ('x', 'p', None, None, '(S)-(ALPHA)-METHYL-4-CARBOXYPHENYLGLYCINE'),
  48.628 -"OTB": ('x', 'p', None, None, '"TERTIARY-BUTOXYFORMIC'),
  48.629 -"FT6": ('w', 'p', 'M', None, '6-FLUORO-L-TRYPTOPHAN'),
  48.630 -"XGA": ('n', 'd', None, None, '6-AMINO-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3,7-DIHYDRO-8H-IMIDAZO[4,5-G]QUINAZOLIN-8-ONE'),
  48.631 -"AA4": ('a', 'p', 'M', None, '"2-AMINO-5-HYDROXYPENTANOIC'),
  48.632 -"R": ('a', 'd', 'M', None, "2'-DEOXY-N6-(R)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"),
  48.633 -"GFL": ('g', 'd', 'M', None, '2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE'),
  48.634 -"10C": ('c', 'r', 'M', None, '4-AMINO-1-{2,5-ANHYDRO-4-[(PHOSPHONOOXY)METHYL]-ALPHA-L-LYXOFURANOSYL}PYRIMIDIN-2(1H)-ONE'),
  48.635 -"NRQ": ('x', 'p', None, None, '"{(4Z)-4-(4-HYDROXYBENZYLIDENE)-2-[3-(METHYLTHIO)PROPANIMIDOYL]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'),
  48.636 -"XGU": ('g', 'd', 'M', None, '2-AMINO-9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE'),
  48.637 -"MCS": ('c', 'p', 'M', None, '"MALONYL'),
  48.638 -"D3": ('n', 'd', None, None, '1-(2-DEOXY-BETA-D-RIBOFURANOSYL)-4-(3-BENZAMIDO)PHENYLIMIDAZOLE'),
  48.639 -"PAQ": ('y', 'p', 'M', None, '2-OXY-4-HYDROXY-5-(2-HYDRAZINOPYRIDINE)PHENYLALANINE'),
  48.640 -"2PR": ('g', 'd', 'M', None, '"2-AMINO-9-[2-DEOXYRIBOFURANOSYL]-9H-PURINE-5\'-MONOPHOSPHATE"'),
  48.641 -"PAT": ('w', 'p', 'M', None, 'ALPHA-PHOSPHONO-TRYPTOPHAN'),
  48.642 -"PAU": ('a', 'p', 'M', None, '"PANTOTHENOIC'),
  48.643 -"CH": ('c', 'r', 'M', None, "N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"),
  48.644 -"GCM": ('x', 'p', None, None, '"GLYCYLMETHYLENE'),
  48.645 -"CSZ": ('c', 'p', 'M', None, '"S-SELANYL'),
  48.646 -"GMS": ('g', 'd', 'M', None, '"2\'-DEOXYGUANOSINE-5\'-MONOSELENOPHOSPHATE"'),
  48.647 -"HBN": ('h', 'p', 'M', None, 'N-(2-NAPHTHYL)HISTIDINAMIDE'),
  48.648 -"TQQ": ('w', 'p', 'M', None, '"(S)-2-AMINO-3-(6,7-DIHYDRO-6-IMINO-7-OXO-1H-INDOL-3-YL)PROPANOIC'),
  48.649 -"2PI": ('x', 'p', None, None, '"2-AMINO-PENTANOIC'),
  48.650 -"GSU": ('e', 'p', 'M', None, '"O5\'-(L-GLUTAMYL-SULFAMOYL)-ADENOSINE"'),
  48.651 -"CSI": ('g', 'p', 'M', None, '"AMINO-(2-IMINO-HEXAHYDRO-PYRIMIDIN-4-YL)-ACETIC'),
  48.652 -"200": ('f', 'p', 'M', None, '4-CHLORO-L-PHENYLALANINE'),
  48.653 -"EXY": ('l', 'p', 'M', None, '6-[(2R)-OXIRAN-2-YL]-L-NORLEUCINE'),
  48.654 -"12A": ('a', 'r', 'M', None, '"2-METHYLTHIO-N6-(AMINOCARBONYL-L-THREONYL)-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.655 -"DCL": ('x', 'p', None, None, '2-AMINO-4-METHYL-PENTAN-1-OL'),
  48.656 -"UNK": ('x', 'p', None, None, 'UNKNOWN'),
  48.657 -"DCI": ('x', 'p', None, None, '2-METHYL-BUTYLAMINE'),
  48.658 -"DCG": ('g', 'd', 'M', None, '"2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.659 -"4HT": ('w', 'p', 'M', None, '4-HYDROXYTRYPTOPHAN'),
  48.660 -"CSU": ('c', 'p', 'M', None, '"CYSTEINE-S-SULFONIC'),
  48.661 -"HSO": ('h', 'p', 'M', None, 'HISTIDINOL'),
  48.662 -"HSL": ('s', 'p', 'M', None, '"HOMOSERINE'),
  48.663 -"TLN": ('n', 'd', None, None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'),
  48.664 -"TZB": ('x', 'p', None, None, '"(4S)-2-[(1E)-1-AMINOPROP-1-ENYL]-4,5-DIHYDRO-1,3-THIAZOLE-4-CARBOXYLIC'),
  48.665 -"BTC": ('c', 'p', 'M', None, 'CYSTEINE'),
  48.666 -"DCY": ('c', 'p', 'M', None, 'D-CYSTEINE'),
  48.667 -"DCT": ('c', 'd', 'M', None, "2',3'-DIDEOXYCYTIDINE 5'-TRIPHOSPHATE"),
  48.668 -"4BF": ('y', 'p', 'M', None, '4-BROMO-L-PHENYLALANINE'),
  48.669 -"C2L": ('c', 'r', 'M', None, "5-METHYL-3'-O-METHOXYETHYL CYTIDINE 5'-MONOPHOSPHATE"),
  48.670 -"RIA": ('a', 'r', 'M', None, '"2\'-O-[(5\'-PHOSPHO)RIBOSYL]ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.671 -"IAM": ('a', 'p', 'M', None, '4-[(ISOPROPYLAMINO)METHYL]PHENYLALANINE'),
  48.672 -"B1F": ('f', 'p', 'M', None, '"PHENYLALANINE'),
  48.673 -"GPN": ('g', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-GUANINE'),
  48.674 -"C25": ('c', 'r', 'M', None, '"CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.675 -"N2C": ('x', 'p', None, None, 'N,S-DIMETHYLCYSTEINE'),
  48.676 -"HAC": ('a', 'p', 'M', None, 'BETA-CYCLOHEXYL-ALANINE'),
  48.677 -"MEG": ('e', 'p', 'M', None, '"(2S,3R)-3-METHYL-GLUTAMIC'),
  48.678 -"THO": ('x', 'p', None, None, '"REDUCED'),
  48.679 -"BNO": ('x', 'p', None, None, '"NORLEUCINE'),
  48.680 -"125": ('u', 'r', 'M', None, '"2\'-O-FLUOROETHYL-5-METHYL-URIDINE-5\'-MONOPHOSPHATE"'),
  48.681 -"126": ('u', 'r', 'M', None, '"2\'-O-METHYL-[TRI(OXYETHYL)]-5-METHYL-URIDINE-5\'-MONOPHOSPHATE"'),
  48.682 -"127": ('u', 'r', 'M', None, '"2\'-O-AMINOOXY-ETHYL-5-METHYL-URIDINE-5\'-MONOPHOSPHATE"'),
  48.683 -"128": ('n', 'r', None, None, '"SPIRO(2,4,6-TRINITROBENZENE[1,2A]-2O\',3O\'-METHYLENE-ADENINE-TRIPHOSPHATE"'),
  48.684 -"X": ('g', 'd', 'M', None, "2'-DEOXY-N7-(8,9-DIHYDRO-9-HYDROXY-10-DEHYDROXY-AFLATOXIN)GUANOSINE MONOPHOSPHATE"),
  48.685 -"HS8": ('h', 'p', 'M', None, '3-(1-SULFO-1H-IMIDAZOL-3-IUM-4-YL)-L-ALANINE'),
  48.686 -"NAM": ('a', 'p', 'M', None, '"NAM'),
  48.687 -"THC": ('t', 'p', 'M', None, 'N-METHYLCARBONYLTHREONINE'),
  48.688 -"HRP": ('w', 'p', 'M', None, '5-HYDROXY-L-TRYPTOPHAN'),
  48.689 -"MNV": ('v', 'p', 'M', None, '"N-METHYL-C-AMINO'),
  48.690 -"PHI": ('f', 'p', 'M', None, 'IODO-PHENYLALANINE'),
  48.691 -"PHM": ('f', 'p', 'M', None, 'PHENYLALANYLMETHANE'),
  48.692 -"PHL": ('f', 'p', 'M', None, 'L-PHENYLALANINOL'),
  48.693 -"CSW": ('c', 'p', 'M', None, 'CYSTEINE-S-DIOXIDE'),
  48.694 -"ZAD": ('a', 'r', 'M', None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-ADENINE"'),
  48.695 -"OIC": ('x', 'p', None, None, '"OCTAHYDROINDOLE-2-CARBOXYLIC'),
  48.696 -"PHE": ('F', 'p', None, None, 'PHENYLALANINE'),
  48.697 -"PHD": ('d', 'p', 'M', None, '"ASPARTYL'),
  48.698 -"SYS": ('c', 'p', 'M', None, '3-[(2-AMINO-2-OXOETHYL)SELANYL]-L-ALANINE'),
  48.699 -"BG1": ('s', 'p', 'M', None, 'O-[(2S)-2-{METHYL[(METHYLAMINO)SULFONYL]AMINO}PENTANOYL]-L-SERINE'),
  48.700 -"NIY": ('y', 'p', 'M', None, 'META-NITRO-TYROSINE'),
  48.701 -"MTR": ('t', 'd', 'M', None, '(5-METHYL-6-OXO-1,6-DIHYDRO-PYRIDIN-3-YL)-1,2-DIDEOXY-RIBOFURANOSE-5-MONOPHOSPHATE'),
  48.702 -"OAS": ('s', 'p', 'M', None, 'O-ACETYLSERINE'),
  48.703 -"BRU": ('n', 'd', None, None, '"5-BROMO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
  48.704 -"NIT": ('x', 'p', None, None, '4-NITROANILINE'),
  48.705 -"OIP": ('n', 'd', None, None, "2'-DEOXY-INOSINIC ACID"),
  48.706 -"0A0": ('d', 'p', 'M', None, '"2-METHYL-L-ASPARTIC'),
  48.707 -"0A1": ('y', 'p', 'M', None, 'O-METHYL-L-TYROSINE'),
  48.708 -"0A2": ('k', 'p', 'M', None, '[(1R)-1,5-DIAMINOPENTYL][BIS(ETHANOLATO)]HYDROXYBORATE(1-)'),
  48.709 -"CRQ": ('x', 'p', None, None, '"[2-(3-CARBAMOYL-1-IMINO-PROPYL)-4-(4-HYDROXY-BENZYLIDENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC'),
  48.710 -"CRW": ('x', 'p', None, None, '"[2-(1-AMINOETHYL)-4-METHYLENE-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.711 -"CRU": ('e', 'p', 'M', None, '"4-[(4Z)-1-(CARBOXYMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-2-YL]-4-IMINOBUTANOIC'),
  48.712 -"DMH": ('n', 'p', 'M', None, 'N4,N4-DIMETHYL-ASPARAGINE'),
  48.713 -"PRR": ('a', 'p', 'M', None, '3-(METHYL-PYRIDINIUM)ALANINE'),
  48.714 -"CRX": ('x', 'p', None, None, '"[2-(1-AMINOETHYL)-2-HYDROXY-4-METHYLENE-5-OXOIMIDAZOLIDIN-1-YL]ACETIC'),
  48.715 -"DMK": ('d', 'p', 'M', None, '"3,3-DIMETHYL'),
  48.716 -"MHL": ('l', 'p', 'M', None, 'N-METHYL-4-HYDROXY-LEUCINE'),
  48.717 -"LAA": ('d', 'p', 'M', None, '(3R)-3-HYDROXY-L-ALPHA-ASPARAGINE'),
  48.718 -"0ZM": ('x', 'p', None, None, '"(2R)-2-{[(2R)-2-{[(S)-HYDROXY{[(2R,3S,4S,5R,6R)-3,4,5-TRIHYDROXY-6-METHYLTETRAHYDRO-2H-PYRAN-2-YL]OXY}PHOSPHORYL]AMINO}-4-METHYLPENTANOYL]AMINO}-3-(1H-INDOL-3-YL)PROPANOIC'),
  48.719 -"DMT": ('x', 'p', None, None, '"3-HYDROXY-4,4-DIMETHYL-2-(METHYLAMINO)-6-OCTENOIC'),
  48.720 -"CRG": ('x', 'p', None, None, '"[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(3H-IMIDAZOL-4-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC'),
  48.721 -"0ZJ": ('x', 'p', None, None, 'N-(SULFANYLACETYL)-D-PHENYLALANYL-N-[(1S)-4-CARBAMIMIDAMIDO-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE'),
  48.722 -"0ZE": ('x', 'p', None, None, '"AMINO{[(4R)-4-({[(3R,6S,8AS)-6-AMINO-6-BENZYL-5-OXOOCTAHYDROINDOLIZIN-3-YL]CARBONYL}AMINO)-5-(1,3-BENZOTHIAZOL-2-YL)-5-OXOPENTYL]AMINO}METHANIMINIUM'),
  48.723 -"CRK": ('x', 'p', None, None, '4-{(Z)-[2-[3-(METHYLSULFANYL)PROPANOYL]-5-OXO-1-(2-OXOETHYL)-1,5-DIHYDRO-4H-IMIDAZOL-4-YLIDENE]METHYL}BENZENOLATE'),
  48.724 -"TPH": ('x', 'p', None, None, '"2-AMINO-3-PHENYL-PROPANE-1,1-DIOL'),
  48.725 -"1PI": ('x', 'p', None, None, '3-(1-CARBAMIMIDOYL-PIPERIDIN-3-YL)-L-ALANINE'),
  48.726 -"CRO": ('x', 'p', None, None, '"{2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'),
  48.727 -"0ZC": ('x', 'p', None, None, '"(3R)-2-[N-(FURAN-2-YLCARBONYL)-L-LEUCYL]-2,3,4,9-TETRAHYDRO-1H-BETA-CARBOLINE-3-CARBOXYLIC'),
  48.728 -"MIS": ('s', 'p', 'M', None, 'MONOISOPROPYLPHOSPHORYLSERINE'),
  48.729 -"C36": ('c', 'd', 'M', None, '"5-METHYL-5-FLUORO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.730 -"NFA": ('f', 'p', 'M', None, '"PHENYLALANINE'),
  48.731 -"PU": ('a', 'r', 'M', None, '"PUROMYCIN-N-AMINOPHOSPHONIC'),
  48.732 -"G7M": ('g', 'r', 'M', None, '"N7-METHYL-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.733 -"S6G": ('g', 'd', 'M', None, '"6-THIO-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.734 -"LDH": ('k', 'p', 'M', None, 'N~6~-ETHYL-L-LYSINE'),
  48.735 -"TCP": ('t', 'd', 'M', None, '"5\'-METHYLTHYMIDINE"'),
  48.736 -"TCQ": ('x', 'p', None, None, '3-((3E)-4-(HYDROXYMETHYL)-6-OXO-3-{[(1S,2R)-2-PHENYLCYCLOPROPYL]IMINO}CYCLOHEXA-1,4-DIEN-1-YL)ALANINE'),
  48.737 -"8AN": ('a', 'r', 'M', None, "3'-amino-3'-deoxyadenosine 5'-(dihydrogen phosphate)"),
  48.738 -"BUC": ('c', 'p', 'M', None, 'S,S-BUTYLTHIOCYSTEINE'),
  48.739 -"C32": ('c', 'd', 'M', None, '"5-BROMO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.740 -"7MG": ('g', 'r', 'M', None, '"7N-METHYL-8-HYDROGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.741 -"BUG": ('l', 'p', 'M', None, '"TERT-LEUCYL'),
  48.742 -"LEU": ('L', 'p', None, None, 'LEUCINE'),
  48.743 -"MHO": ('m', 'p', 'M', None, 'S-OXYMETHIONINE'),
  48.744 -"C": ('C', 'r', None, None, '"CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.745 -"DDX": ('n', 'd', None, None, '"2\',3\'-DEHYDRO-2\',3\'-DIDEOXYRIBOFURANOSE-5\'-PHOSPHATE"'),
  48.746 -"OPR": ('r', 'p', 'M', None, 'C-(3-OXOPROPYL)ARGININE'),
  48.747 -"FOX": ('g', 'd', 'M', None, "((1R,2S,4R)-4-{[2-AMINO-5-(FORMYLAMINO)-6-OXO-3,6-DIHYDROPYRIMIDIN-4-YL]AMINO}-2-HYDROXYCYCLOPENTYL)METHYL 5'-PHOSPHATE"),
  48.748 -"MTY": ('y', 'p', 'M', None, 'META-TYROSINE'),
  48.749 -"TC1": ('c', 'd', 'M', None, '3-(5-PHOSPHO-2-DEOXY-BETA-D-RIBOFURANOSYL)-2-OXO-1,3-DIAZA-PHENOTHIAZINE'),
  48.750 -"FOE": ('c', 'p', 'M', None, '2-(2-AMINO-3-OXO-PROPYLSULFANYL)-N-(4-FLUORO-PHENYL)-N-ISOPROPYL-ACETAMIDE'),
  48.751 -"MF3": ('x', 'p', None, None, '"2-AMINO-4-TRIFLUOROMETHYLSULFANYL-BUTYRIC'),
  48.752 -"DDN": ('n', 'd', None, None, '"3,4-DIHYDRO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
  48.753 -"FME": ('m', 'p', 'M', None, 'N-FORMYLMETHIONINE'),
  48.754 -"EFC": ('c', 'p', 'M', None, 'S,S-(2-FLUOROETHYL)THIOCYSTEINE'),
  48.755 -"DDG": ('g', 'd', 'M', None, '"2\',3\'-DIDEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.756 -"MHS": ('h', 'p', 'M', None, '"N1-METHYLATED'),
  48.757 -"DDE": ('h', 'p', 'M', None, '{3-[4-(2-AMINO-2-CARBOXY-ETHYL)-1H-IMIDAZOL-2-YL]-1-CARBAMOYL-PROPYL}-TRIMETHYL-AMMONIUM'),
  48.758 -"CR2": ('x', 'p', None, None, '"{(4Z)-2-(AMINOMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'),
  48.759 -"DSN": ('s', 'p', 'M', None, 'D-SERINE'),
  48.760 -"PSH": ('h', 'p', 'M', None, '1-THIOPHOSPHONO-L-HISTIDINE'),
  48.761 -"6MI": ('n', 'd', None, None, '6-METHYL-8-(2-DEOXY-RIBOFURANOSYL)ISOXANTHOPTERIDINE'),
  48.762 -"CR7": ('x', 'p', None, None, '"[(4Z)-2-[(1S)-1,5-DIAMINOPENTYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.763 -"0AV": ('a', 'd', 'M', None, "2'-O-methyladenosine 5'-(dihydrogen phosphate)"),
  48.764 -"CR5": ('g', 'p', 'M', None, '(2R)-2-(AMINOMETHYL)-2,4-DIHYDROXY-5-OXO-3-(2-OXOETHYL)-2,5-DIHYDRO-1H-IMIDAZOL-3-IUM'),
  48.765 -"MOR": ('x', 'p', None, None, 'N-CARBONYLMORPHOLINE'),
  48.766 -"CR8": ('x', 'p', None, None, '2-[1-AMINO-2-(1H-IMIDAZOL-5-YL)ETHYL]-1-(CARBOXYMETHYL)-4-[(4-OXOCYCLOHEXA-2,5-DIEN-1-YLIDENE)METHYL]-1H-IMIDAZOL-5-OLATE'),
  48.767 -"PSA": ('f', 'p', 'M', None, '"3-HYDROXY-4-AMINO-5-PHENYLPENTANOIC'),
  48.768 -"ANI": ('x', 'p', None, None, '4-TRIFLUOROMETHYLANILINE'),
  48.769 -"NP3": ('n', 'd', None, None, '"1-[2-DEOXY-RIBOFURANOSYL]-1H-[3-NITRO-PYRROL]-5\'-PHOSPHATE"'),
  48.770 -"DM0": ('k', 'p', 'M', None, 'N~2~,N~2~,N~6~,N~6~-TETRAMETHYL-L-LYSINE'),
  48.771 -"0AA": ('v', 'p', 'M', None, '"METHYL'),
  48.772 -"0AB": ('v', 'p', 'M', None, '(3S,4S)-3-AMINO-4-METHYL-3,4-DIHYDRO-2H-PYRAN-2-ONE'),
  48.773 -"FOG": ('f', 'p', 'M', None, 'PHENYLALANINOYL-[1-HYDROXY]-2-PROPYLENE'),
  48.774 -"0AD": ('g', 'd', 'M', None, "2'-deoxy-N-propylguanosine 5'-(dihydrogen phosphate)"),
  48.775 -"LCC": ('n', 'd', None, None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'),
  48.776 -"NVA": ('v', 'p', 'M', None, 'NORVALINE'),
  48.777 -"0AG": ('l', 'p', 'M', None, 'N-(ETHOXYCARBONYL)-L-LEUCINE'),
  48.778 -"0AH": ('s', 'p', 'M', None, 'O-(BROMOACETYL)-L-SERINE'),
  48.779 -"1MA": ('a', 'r', 'M', None, '"6-HYDRO-1-METHYLADENOSINE-5\'-MONOPHOSPHATE"'),
  48.780 -"0AK": ('d', 'p', 'M', None, '"(2S)-2-AMINO-4-(2-CHLOROETHOXY)-4-OXOBUTANOIC'),
  48.781 -"0AM": ('a', 'd', 'M', None, "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"),
  48.782 -"PST": ('t', 'd', 'M', None, '"THYMIDINE-5\'-THIOPHOSPHATE"'),
  48.783 -"PSU": ('u', 'r', 'M', None, '"PSEUDOURIDINE-5\'-MONOPHOSPHATE"'),
  48.784 -"1ZX": ('x', 'p', None, None, 'D-PHENYLALANYL-N-[(1S)-1-ACETYL-4-{[AMINO(IMINIO)METHYL]AMINO}BUTYL]-L-PROLINAMIDE'),
  48.785 -"ILG": ('e', 'p', 'M', None, '"GLUTAMYL'),
  48.786 -"ILE": ('I', 'p', None, None, 'ISOLEUCINE'),
  48.787 -"5CM": ('c', 'd', 'M', None, '"5-METHYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'),
  48.788 -"D11": ('t', 'p', 'M', None, 'D-PHOSHPHOTHREONINE'),
  48.789 -"UMS": ('n', 'd', None, None, '"2\'-METHYLSELENYL-2\'-DEOXYURIDINE-5\'-PHOSPHATE"'),
  48.790 -"X9Q": ('x', 'p', None, None, '"{(2S)-2-[(1S)-1-AMINOETHYL]-4-BENZYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'),
  48.791 -"FGP": ('s', 'p', 'M', None, '"2-AMINO-3-HYDROXY-3-PHOSPHONOOXY-PROPIONIC'),
  48.792 -"DVA": ('v', 'p', 'M', None, 'D-VALINE'),
  48.793 -"5ZA": ('x', 'p', None, None, '(5Z)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-5-[(4-AMINO-1H-INDOL-3-YL)METHYLENE]-3-(2-HYDROXYETHYL)-3,5-DIHYDRO-4H-IMIDAZOL-4-ONE'),
  48.794 -"BCC": ('x', 'p', None, None, '6-AMINO-6-BENZYL-5-OXO-OCTAHYDRO-INDOLIZINE-3-CARBALDEHYDE'),
  48.795 -"TMB": ('t', 'p', 'M', None, 'N-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE'),
  48.796 -"3AH": ('h', 'p', 'M', None, '[HISTIDIN-1-YL-4H-[1,2,4]TRIAZOL-5-YL]-AMINE'),
  48.797 -"BCX": ('c', 'p', 'M', None, 'BETA-3-CYSTEINE'),
  48.798 -"KPI": ('k', 'p', 'M', None, '"(2S)-2-AMINO-6-[(1-HYDROXY-1-OXO-PROPAN-2-YLIDENE)AMINO]HEXANOIC'),
  48.799 -"A2L": ('a', 'r', 'M', None, "3'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"),
  48.800 -"A2M": ('a', 'r', 'M', None, '"2\'-O-METHYL-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.801 -"ILX": ('i', 'p', 'M', None, '4,5-DIHYDROXYISOLEUCINE'),
  48.802 -"PEC": ('c', 'p', 'M', None, 'S,S-PENTYLTHIOCYSTEINE'),
  48.803 -"IGL": ('g', 'p', 'M', None, '"ALPHA-AMINO-2-INDANACETIC'),
  48.804 -"OLE": ('x', 'p', None, None, '"2-HYDROXY-4-METHYL-PENTANOIC'),
  48.805 -"LPG": ('g', 'p', 'M', None, 'L-PROPARGYLGLYCINE'),
  48.806 -"DI": ('n', 'd', None, None, '"2\'-DEOXYINOSINE-5\'-MONOPHOSPHATE"'),
  48.807 -"ALO": ('t', 'p', 'M', None, 'ALLO-THREONINE'),
  48.808 -"OLT": ('t', 'p', 'M', None, 'O-METHYL-L-THREONINE'),
  48.809 -"TBM": ('t', 'p', 'M', None, '4-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE'),
  48.810 -"2AU": ('u', 'r', 'M', None, '"2\'-AMINOURIDINE"'),
  48.811 -"LED": ('l', 'p', 'M', None, '(4R)-5-OXO-L-LEUCINE'),
  48.812 -"OLZ": ('s', 'p', 'M', None, 'O-(2-AMINOETHYL)-L-SERINE'),
  48.813 -"C6C": ('c', 'p', 'M', None, '"S-CYCLOHEXYL'),
  48.814 -"IEY": ('x', 'p', None, None, '2-((1E)-2-(5-IMIDAZOLYL)ETHENYL)-4-(P-HYDROXYBENZYLIDENE)-5-IMIDAZOLINONE'),
  48.815 -"HEU": ('n', 'd', None, None, '"3-(2-HYDROXYETHYL)-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
  48.816 -"SRA": ('a', 'r', 'M', None, "ADENOSINE -5'-THIO-MONOPHOSPHATE"),
  48.817 -"ZTH": ('n', 'r', None, None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-THYMINE"'),
  48.818 -"NDN": ('n', 'd', None, None, "2'-DEOXY-5-NITROURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.819 -"C66": ('x', 'p', None, None, '2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-CYTOSINE'),
  48.820 -"6CW": ('w', 'p', 'M', None, '6-CHLORO-L-TRYPTOPHAN'),
  48.821 -"CR0": ('x', 'p', None, None, '[2-(1-AMINO-2-HYDROXYPROPYL)-2-HYDROXY-4-ISOBUTYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETALDEHYDE'),
  48.822 -"DBM": ('n', 'r', None, None, '9-(6-DEOXY-BETA-D-ALLOFURANOSYL)-6-METHYLPURINE'),
  48.823 -"N": ('n', 'r', None, None, "ANY 5'-MONOPHOSPHATE NUCLEOTIDE"),
  48.824 -"N6G": ('g', 'r', 'M', None, '"((2R,3S,4R,5S)-5-(2,6-DIAMINO-9H-PURIN-9-YL)-3,4-DIHYDROXY-TETRAHYDROFURAN-2-YL)METHYL'),
  48.825 -"ALC": ('a', 'p', 'M', None, '"2-AMINO-3-CYCLOHEXYL-PROPIONIC'),
  48.826 -"S1H": ('s', 'p', 'M', None, '1-HEXADECANOSULFONYL-O-L-SERINE'),
  48.827 -"FZN": ('k', 'p', 'M', None, '(2S)-2-amino-6-{[(1Z)-1-{[(2R,3R,4S,5R)-5-({[(R)-{[(R)-{[(2R,3S,4R,5R)-5-(6-amino-9H-purin-9-yl)-3,4-dihydroxytetrahydrofuran-2-yl]methoxy}(hydroxy)phosphoryl]oxy}(hydroxy)phosphoryl]oxy}methyl)-3,4-dihydroxytetrahydrofuran-2-yl]sulfanyl}ethylidene]amino}hexanoic acid'),
  48.828 -"0SP": ('a', 'd', 'M', None, "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"),
  48.829 -"OBS": ('x', 'p', None, None, '(Z)-N^6-[(4S,5R)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)-1-HYDROXYDIHYDRO-2H-THIOPYRANIUM-3(4H)-YLIDENE]-L-LYSINE'),
  48.830 -"2NT": ('t', 'd', 'M', None, "2'-O-[2-[HYDROXY(METHYLENEAMINO)OXY]ETHYL THYMIDINE-5'-MONOPHOSPHATE"),
  48.831 -"K1R": ('c', 'p', 'M', None, '"(2S)-2-AMINO-4-[({[(2R)-2-AMINO-2-CARBOXYETHYL]THIO}AMINO)SULFINYL]BUTANOIC'),
  48.832 -"NMT": ('t', 'd', 'M', None, '1-(O2-(METHYLCARBAMOYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE'),
  48.833 -"A23": ('a', 'r', 'M', None, "ADENOSINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"),
  48.834 -"D1P": ('n', 'd', None, None, '"2\'-DEOXY-RIBOFURANOSE-5\'-PHOSPHATE"'),
  48.835 -"NMS": ('t', 'd', 'M', None, '1-(O2-(2-METHYLAMINO-2-OXO-ETHYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE'),
  48.836 -"CIR": ('r', 'p', 'M', None, 'CITRULLINE'),
  48.837 -"CH6": ('x', 'p', None, None, '"{(4Z)-2-[(1S)-1-AMINO-3-(METHYLSULFANYL)PROPYL]-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'),
  48.838 -"BJH": ('x', 'p', None, None, '"1(R)-1-ACETAMIDO-2-(3-CARBOXY-2-HYDROXYPHENYL)ETHYL'),
  48.839 -"YYG": ('g', 'r', 'M', None, '"4-(3-[5-O-PHOSPHONORIBOFURANOSYL]-4,6-DIMETHYL-8-OXO-4,8-DIHYDRO-3H-1,3,4,5,7A-PENTAAZA-S-INDACEN-YLAMINO-BUTYRIC'),
  48.840 -"2MA": ('a', 'r', 'M', None, '"2-METHYLADENOSINE-5\'-MONOPHOSPHATE"'),
  48.841 -"NMC": ('g', 'p', 'M', None, '"N-CYCLOPROPYLMETHYL'),
  48.842 -"0AY": ('x', 'p', None, None, '"DIETHYL'),
  48.843 -"UFP": ('n', 'd', None, None, '"5-FLUORO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
  48.844 -"SET": ('s', 'p', 'M', None, 'AMINOSERINE'),
  48.845 -"UFR": ('n', 'd', None, None, "2'-DEOXY-5-FORMYLURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.846 -"NMM": ('r', 'p', 'M', None, '"(R)-2-AMINO-5-(3-METHYLGUANIDINO)BUTANOIC'),
  48.847 -"UFT": ('n', 'd', None, None, "2'-deoxy-2'-fluorouridine 5'-(dihydrogen phosphate)"),
  48.848 -"0AZ": ('p', 'p', 'M', None, '(4R)-4-HYDROXY-L-PROLINE'),
  48.849 -"5IT": ('t', 'd', 'M', None, '"5-IODO-THYMIDINE-5\'-PHOSPHATE"'),
  48.850 -"DIL": ('i', 'p', 'M', None, 'D-ISOLEUCINE'),
  48.851 -"FHL": ('x', 'p', None, None, '(E)-N~6~-[3-CARBOXY-1-(HYDROXYMETHYL)PROPYLIDENE]-L-LYSINE'),
  48.852 -"0Z6": ('x', 'p', None, None, 'D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PHENYLALANINAMIDE'),
  48.853 -"AR4": ('e', 'p', 'M', None, '"2-AMINO-5-(3-FLUORO-3,4-DIHYDROXY-5-HYDROXYMETHYL-TETRAHYDRO-FURAN-2-YLOXY)-5-HYDROXY-PENTANOIC'),
  48.854 -"EIT": ('t', 'd', 'M', None, '"((3R,4R,5R)-4-(2-(1H-IMIDAZOL-1-YL)ETHOXY)-3-HYDROXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-TETRAHYDROFURAN-2-YL)METHYL'),
  48.855 -"SD2": ('x', 'p', None, None, 'N-(SULFANYLACETYL)TYROSYLPROLYLMETHIONINAMIDE'),
  48.856 -"CH7": ('x', 'p', None, None, '"[(4Z)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-2-(3,4,5,6-TETRAHYDROPYRIDIN-2-YL)-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.857 -"N5I": ('n', 'd', None, None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE'),
  48.858 -"5AT": ('t', 'd', 'M', None, '"5\'-AMINO-5\'-DEOXYTHYMIDINE"'),
  48.859 -"LOL": ('x', 'p', None, None, 'LEUCINOL'),
  48.860 -"HYP": ('p', 'p', 'M', None, '4-HYDROXYPROLINE'),
  48.861 -"IYT": ('t', 'p', 'M', None, 'N-ALPHA-ACETYL-3,5-DIIODOTYROSYL-D-THREONINE'),
  48.862 -"LOV": ('x', 'p', None, None, '"5-AMINO-4-HYDROXY-2-ISOPROPYL-7-METHYL-OCTANOIC'),
  48.863 -"LCX": ('k', 'p', 'M', None, '"CARBOXYLATED'),
  48.864 -"GND": ('x', 'p', None, None, '"2-AMINO-5-GUANIDINO-PENTANOIC'),
  48.865 -"GNE": ('n', 'd', None, None, '1,N2-ETHENOGUANINE'),
  48.866 -"FHU": ('u', 'r', 'M', None, '"(5S,6R)-5-FLUORO-6-HYDROXY-PSEUDOURIDINE-5\'-MONOPHOSPHATE"'),
  48.867 -"C12": ('x', 'p', None, None, '2-(1-AMINO-2-HYDROXYPROPYL)-4-(4-HYDROXYBENZYL)-1-(2-OXOETHYL)-1H-IMIDAZOL-5-OLATE'),
  48.868 -"DIV": ('v', 'p', 'M', None, 'D-ISOVALINE'),
  48.869 -"T6A": ('a', 'r', 'M', None, '"N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)CARBAMOYL]THREONINE-5\'-MONOPHOSPHATE"'),
  48.870 -"DIR": ('r', 'p', 'M', None, '3-{[(E)-AMINO(HYDROXYIMINO)METHYL]AMINO}ALANINE'),
  48.871 -"AIB": ('a', 'p', 'M', None, '"ALPHA-AMINOISOBUTYRIC'),
  48.872 -"SOC": ('c', 'p', 'M', None, 'DIOXYSELENOCYSTEINE'),
  48.873 -"NSK": ('x', 'p', None, None, '"N-SUCCINYL'),
  48.874 -"LCG": ('g', 'd', 'M', None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(GUANIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'),
  48.875 -"MT2": ('m', 'p', 'M', None, '[(3S)-3-AMINO-3-CARBOXYPROPYL](ETHYL)METHYLSULFONIUM'),
  48.876 -"GAU": ('e', 'p', 'M', None, '"(4S)-4-AMINO-5-HYDROXYPENTANOIC'),
  48.877 -"PPH": ('l', 'p', 'M', None, '"PHENYLALANINE'),
  48.878 -"PPN": ('f', 'p', 'M', None, 'PARA-NITROPHENYLALANINE'),
  48.879 -"1MG": ('g', 'r', 'M', None, '"1N-METHYLGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.880 -"GAO": ('g', 'r', 'M', None, "GUANINE ARABINOSE-5'-PHOSPHATE"),
  48.881 -"FAI": ('n', 'r', None, None, '5-(FORMYLAMINO)-1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE-4-CARBOXAMIDE'),
  48.882 -"PPW": ('g', 'd', 'M', None, '"7-DEAZA-8-AZA-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.883 -"2ML": ('l', 'p', 'M', None, '2-METHYLLEUCINE'),
  48.884 -"MBZ": ('n', 'd', None, None, '"1-[2-DEOXYRIBOFURANOSYL]-4-METHYL-BENZOIMIDAZOLE-5\'-MONOPHOSPHATE"'),
  48.885 -"TPN": ('t', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE'),
  48.886 -"SOY": ('s', 'p', 'M', None, '"OXACILLIN-ACYLATED'),
  48.887 -"CBV": ('c', 'r', 'M', None, "5-BROMOCYTIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
  48.888 -"MBQ": ('y', 'p', 'M', None, '2-HYDROXY-5-({1-[(4-METHYLPHENOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)-L-TYROSINE'),
  48.889 -"G31": ('g', 'd', 'M', None, '"3\'-METHYL-2\',3\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.890 -"HTI": ('c', 'p', 'M', None, '(4S)-4-{[(2S)-2-AMINO-3-OXOPROPYL]SULFANYL}-L-HOMOSERINE'),
  48.891 -"XX1": ('k', 'p', 'M', None, 'N~6~-7H-PURIN-6-YL-L-LYSINE'),
  48.892 -"6OG": ('g', 'd', 'M', None, "6-O-METHYL GUANOSINE-5'-MONOPHOSPHATE"),
  48.893 -"0AF": ('w', 'p', 'M', None, '7-HYDROXY-L-TRYPTOPHAN'),
  48.894 -"G36": ('g', 'd', 'M', None, '"O6-ETHYL-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
  48.895 -"I": ('n', 'r', None, None, '"INOSINIC'),
  48.896 -"YOF": ('y', 'p', 'M', None, '3-FLUOROTYROSINE'),
  48.897 -"HPQ": ('f', 'p', 'M', None, 'HOMOPHENYLALANINYLMETHANE'),
  48.898 -"LCA": ('a', 'r', 'M', None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(ADENIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'),
  48.899 -"IU": ('u', 'r', 'M', None, '"5-IODOURIDINE-5\'-MONOPHOSPHATE"'),
  48.900 -"CYQ": ('c', 'p', 'M', None, '"2-AMINO-3-PHOSPHONOMETHYLSULFANYL-PROPIONIC'),
  48.901 -"M0H": ('c', 'p', 'M', None, 'S-(HYDROXYMETHYL)-L-CYSTEINE'),
  48.902 -"FA5": ('n', 'r', None, None, '"ADENOSINE-5\'-[PHENYLALANINYL-PHOSPHATE]"'),
  48.903 -"LVG": ('g', 'p', 'M', None, 'L-VINYLGLYCINE'),
  48.904 -"HPH": ('x', 'p', None, None, 'PHENYLALANINDIOL'),
  48.905 -"IRN": ('n', 'r', None, None, '1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE'),
  48.906 -"Y": ('a', 'd', 'M', None, "2'-DEOXY-N6-(S)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"),
  48.907 -"IC": ('c', 'r', 'M', None, '"ISOCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.908 -"4DP": ('w', 'p', 'M', None, '3-[5-(DIMETHYLAMINO)-1,3-DIOXO-1,3-DIHYDRO-2H-ISOINDOL-2-YL]-L-ALANINE'),
  48.909 -"HPE": ('f', 'p', 'M', None, 'HOMOPHENYLALANINE'),
  48.910 -"PR5": ('a', 'r', 'M', None, "PURINE RIBOSIDE-5'-MONOPHOSPHATE"),
  48.911 -"CDE": ('x', 'p', None, None, '1,2-DIMETHYL-PROPYLAMINE'),
  48.912 -"SDP": ('s', 'p', 'M', None, '"2-AMINO-3-(DIETHOXY-PHOSPHORYLOXY)-PROPIONIC'),
  48.913 -"GN7": ('g', 'd', 'M', None, '"N7-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
  48.914 -"ARV": ('x', 'p', None, None, '5-N-ALLYL-ARGININE'),
  48.915 -"2AT": ('t', 'd', 'M', None, "2'-O-ALLYL THYMIDINE-5'-MONOPHOSPHATE"),
  48.916 -"T66": ('x', 'p', None, None, '2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-THYMINE'),
  48.917 -"LCK": ('k', 'p', 'M', None, '(Z)-N~6~-(2-CARBOXY-1-METHYLETHYLIDENE)-L-LYSINE'),
  48.918 -"EHP": ('f', 'p', 'M', None, '3-HYDROXYPHENYLALANINE'),
  48.919 -"CHP": ('g', 'p', 'M', None, '3-CHLORO-4-HYDROXYPHENYLGLYCINE'),
  48.920 -"HY3": ('p', 'p', 'M', None, '3-HYDROXYPROLINE'),
  48.921 -"THX": ('n', 'd', None, None, 'PHOSPHONIC ACID 6-({6-[6-(6-CARBAMOYL-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONYL)-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDOCENE-2-CARBONYL]-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONL}-AMINO)-HEXYL ESTER 5-(5-METHYL-2,4-DIOXO-3,4-DIHYDRO-2H-PYRIMIDIN-1-YL)-TETRAHYDRO-FURAN-2-YLMETHYL ESTER'),
  48.922 -"LCH": ('n', 'd', None, None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'),
  48.923 -"4MF": ('n', 'd', None, None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-METHYL-1H-INDOLE'),
  48.924 -"DTR": ('w', 'p', 'M', None, 'D-TRYPTOPHAN'),
  48.925 -"MOD": ('x', 'p', None, None, '"L-METHIONYL'),
  48.926 -"4MM": ('x', 'p', None, None, '(1S)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM'),
  48.927 -"ARO": ('r', 'p', 'M', None, '"C-GAMMA-HYDROXY'),
  48.928 -"GSS": ('g', 'd', 'M', None, "2'-DEOXY-N2-(S)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"),
  48.929 -"DRZ": ('n', 'd', None, None, '"3\',4\'-DIHYDROXY-PENTANAL-5\'-PHOSPHATE"'),
  48.930 -"CCY": ('x', 'p', None, None, '2-(1-AMINO-2-MERCAPTO-ETHYL)-5-(4-HYDROXY-BENZYL)-3-(ETHANOYL)-3,5-DIHYDRO-IMIDAZOL-4-ONE'),
  48.931 -"TPQ": ('y', 'p', 'M', None, '5-(2-CARBOXY-2-AMINOETHYL)-2-HYDROXY-1,4-BENZOQUINONE'),
  48.932 -"5CS": ('c', 'p', 'M', None, '"2-AMINO-3-(CYSTEIN-S-YL)-ISOXAZOLIDIN-5-YL-ACETIC'),
  48.933 -"4OC": ('c', 'r', 'M', None, '"4N,O2\'-METHYLCYTIDINE-5\'-MONOPHOSPHATE"'),
  48.934 -"DRP": ('n', 'd', None, None, '"2-DEOXYRIBOFURANOSYL-PYRIDINE-5\'-MONOPHOSPHATE"'),
  48.935 -"BOR": ('r', 'p', 'M', None, '"(1R)-1-AMINO-4-{[(E)-AMINO(IMINO)METHYL]AMINO}BUTYLBORONIC'),
  48.936 -"DRT": ('t', 'd', 'M', None, "2'-DEOXY-L-RIBO-FURANOSYL THYMINE-5'-MONOPHOSPHATE"),
  48.937 -"CCS": ('c', 'p', 'M', None, '"CARBOXYMETHYLATED'),
  48.938 -"TS": ('n', 'd', None, None, '"THYMIDINE-5\'-THIOPHOSPHATE"'),
  48.939 -"MME": ('m', 'p', 'M', None, '"N-METHYL'),
  48.940 -"CCL": ('k', 'p', 'M', None, 'N~6~-[(CYCLOPENTYLOXY)CARBONYL]-D-LYSINE'),
  48.941 -"GSC": ('g', 'p', 'M', None, '"2-ETHYLTHIO'),
  48.942 -"DRM": ('n', 'd', None, None, '"{[(1R,2S)-2-(2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOPENTYL]OXY}METHYLPHOSPHONIC'),
  48.943 -"4FB": ('p', 'p', 'M', None, '(4S)-4-FLUORO-L-PROLINE'),
  48.944 -"GYS": ('x', 'p', None, None, '"[(4Z)-2-(1-AMINO-2-HYDROXYETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.945 -"NAL": ('a', 'p', 'M', None, 'BETA-(2-NAPHTHYL)-ALANINE'),
  48.946 -"6MT": ('a', 'r', 'M', None, '"TRANS-N6-METHYL-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'),
  48.947 -"CCC": ('c', 'r', 'M', None, "CYTIDINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"),
  48.948 -"UMP": ('u', 'r', 'M', None, "2'-DEOXYURIDINE 5'-MONOPHOSPHATE"),
  48.949 -"ML3": ('k', 'p', 'M', None, '2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}-N,N,N-TRIMETHYLETHANAMINIUM'),
  48.950 -"LPD": ('p', 'p', 'M', None, 'L-PROLINAMIDE'),
  48.951 -"OHI": ('h', 'p', 'M', None, '3-(2-OXO-2H-IMIDAZOL-4-YL)-L-ALANINE'),
  48.952 -"LYR": ('k', 'p', 'M', None, 'N~6~-[(2Z,4E,6E,8E)-3,7-DIMETHYL-9-(2,6,6-TRIMETHYLCYCLOHEX-1-EN-1-YL)NONA-2,4,6,8-TETRAENYL]LYSINE'),
  48.953 -"LYS": ('K', 'p', None, None, 'LYSINE'),
  48.954 -"LYX": ('k', 'p', 'M', None, "N''-(2-COENZYME A)-PROPANOYL-LYSINE"),
  48.955 -"CYA": ('c', 'p', 'M', None, '"TWO'),
  48.956 -"LYZ": ('k', 'p', 'M', None, '5-HYDROXYLYSINE'),
  48.957 -"ODP": ('n', 'r', None, None, '"4-OXO-NICOTINAMIDE-ADENINE'),
  48.958 -"C3Y": ('c', 'p', 'M', None, 'S-[(1S)-1-HYDROXY-1-(HYDROXYAMINO)ETHYL]-L-CYSTEINE'),
  48.959 -"POM": ('p', 'p', 'M', None, 'CIS-5-METHYL-4-OXOPROLINE'),
  48.960 -"LYM": ('k', 'p', 'M', None, 'DEOXY-METHYL-LYSINE'),
  48.961 -"LYN": ('k', 'p', 'M', None, '"2,6-DIAMINO-HEXANOIC'),
  48.962 -"GLQ": ('e', 'p', 'M', None, '"4-AMINO-5-OXO-PENTANOIC'),
  48.963 -"PIV": ('x', 'p', None, None, '"PIVALIC'),
  48.964 -"OHS": ('d', 'p', 'M', None, 'O-(CARBOXYSULFANYL)-4-OXO-L-HOMOSERINE'),
  48.965 -"X9A": ('x', 'p', None, None, 'amino[(3-{(2Z,5S,6S,9R,12S,13R,16R)-5,16-dicarboxy-2-ethylidene-12-[(1E,3E,5S,6R)-6-methoxy-3,5-dimethyl-7-phenylhepta-1,3-dien-1-yl]-1,6,13-trimethyl-3,7,10,14,19-pentaoxo-1,4,8,11,15-pentaazacyclononadecan-9-yl}propyl)amino]methaniminium'),
  48.966 -"SMT": ('n', 'r', None, None, '"2\'-[(METHYLTHIO)ETHYLOXY]-THYMIDINE-5\'-MONOPHOSPHATE"'),
  48.967 -"XAR": ('n', 'd', None, None, '"[(1R,4S,6S)-4-(6-AMINO-9H-PURIN-9-YL)-6-HYDROXYCYCLOHEX-2-EN-1-YL]METHYL'),
  48.968 -"CYD": ('c', 'p', 'M', None, '"2-AMINO-6-(CYSTEIN-S-YL)-5-OXO-HEXANOIC'),
  48.969 -"SVA": ('s', 'p', 'M', None, '"SERINE'),
  48.970 -"5HT": ('t', 'd', 'M', None, '5-HYDROXY-THYMIDINE'),
  48.971 -"5HU": ('n', 'd', None, None, '"5-HYDROXYMETHYLURIDINE-2\'-DEOXY-5\'-MONOPHOSPHATE"'),
  48.972 -"XAL": ('a', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(ADENIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'),
  48.973 -"T": ('n', 'd', None, None, '"THYMIDINE-5\'-MONOPHOSPHATE"'),
  48.974 -"5HP": ('e', 'p', 'M', None, '"PYROGLUTAMIC'),
  48.975 -"4SC": ('c', 'd', 'M', None, "4'-THIO-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE GROUP"),
  48.976 -"CQR": ('x', 'p', None, None, '"[(4Z)-2-(AMINOMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'),
  48.977 -"XAD": ('a', 'd', 'M', None, '9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-9H-PURIN-6-AMINE'),
  48.978 -"XAE": ('n', 'd', None, None, '3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3H-IMIDAZO[4,5-G]QUINAZOLIN-8-AMINE'),
  48.979 -"TYI": ('y', 'p', 'M', None, '3,5-DIIODOTYROSINE'),
  48.980 -"VLL": ('x', 'p', None, None, '"(2S)-2,3-DIAMINOBUTANOIC'),
  48.981 -"2BD": ('n', 'd', None, None, "N1-(1-HYDROXY-3-BUTEN-2(S)-YL)-2'-DEOXYINOSINE MONO PHOSPHORIC ACID"),
  48.982 -"IIC": ('x', 'p', None, None, '"4-IMIDAZOLMETHYLENE-5-IMIDAZOLONE'),
  48.983 -"A66": ('x', 'p', None, None, '2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-ADENINE'),
  48.984 -"2EG": ('g', 'd', 'M', None, "2'-DEOXY-N-ETHYLGUANOSINE 5'-PHOSPHATE"),
  48.985 -"2SA": ('n', 'r', None, None, '"2-[9-(3,4-DIHYDROXY-5-PHOSPHONOOXYMETHYL-TETRAHYDRO-FURAN-2-YL)-9H-PURIN-6-YLAMINO]-SUCCINIC'),
  48.986 -"2CO": ('c', 'p', 'M', None, 'S-HYDROPEROXYCYSTEINE'),
  48.987 -"TMD": ('t', 'p', 'M', None, '(6,7-DIHYDRO)4-[(E)-BUTENYL]-4,N-DIMETHYL-THREONINE'),
  48.988 -"CXM": ('m', 'p', 'M', None, 'N-CARBOXYMETHIONINE'),
  48.989 -"ALA": ('A', 'p', None, None, 'ALANINE'),
  48.990 -"GOM": ('g', 'r', 'M', None, 'GLUTAMOL-AMP'),
  48.991 -"2VA": ('v', 'p', 'M', None, '"2\'-(L-VALYL)AMINO-2\'-DEOXYADENOSINE"'),
  48.992 -"5BU": ('u', 'r', 'M', None, '"5-BROMO-URIDINE-5\'-MONOPHOSPHATE"'),
  48.993 -"IGU": ('g', 'd', 'M', None, '"2\'-DEOXYISOGUANINE-5\'-MONOPHOSPHATE"'),
  48.994 -"MLE": ('l', 'p', 'M', None, 'N-METHYLLEUCINE'),
  48.995 -"T39": ('n', 'r', None, None, "2'-O-METHOXYETHYLENE THYMIDINE 5'-MONOPHOSPHATE"),
  48.996 -"FLT": ('y', 'p', 'M', None, '"FLUOROMALONYL'),
  48.997 -"U37": ('u', 'r', 'M', None, "URIDINE 5'-MONOTHIOPHOSPHATE"),
  48.998 -"U36": ('u', 'r', 'M', None, '"2\'-O-METHYOXYETHYL-URIDINE-5\'-MONOPHOSPHATE"'),
  48.999 -"DHL": ('x', 'p', None, None, '2-AMINO-ETHANETHIOL'),
 48.1000 -"U33": ('n', 'd', None, None, "5-BROMO-2'-DEOXY URIDINE"),
 48.1001 -"TIH": ('a', 'p', 'M', None, 'BETA(2-THIENYL)ALANINE'),
 48.1002 -"SMF": ('f', 'p', 'M', None, '4-SULFOMETHYL-L-PHENYLALANINE'),
 48.1003 -"FLA": ('a', 'p', 'M', None, 'TRIFLUOROALANINE'),
 48.1004 -"HOB": ('n', 'd', None, None, '"CHOLEST-5-EN-3-YL'),
 48.1005 -"FLE": ('l', 'p', 'M', None, 'FUROYL-LEUCINE'),
 48.1006 -"VDL": ('x', 'p', None, None, '"(2R,3R)-2,3-DIAMINOBUTANOIC'),
 48.1007 -"A1P": ('n', 'd', None, None, '9-{2-DEOXY-5-O-[HYDROXY(OXIDO)PHOSPHINO]-BETA-L-ERYTHRO-PENTOFURANOSYL}-9H-PURIN-2-AMINE'),
 48.1008 -"0G6": ('x', 'p', None, None, 'D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE'),
 48.1009 -"LLY": ('k', 'p', 'M', None, 'NZ-(DICARBOXYMETHYL)LYSINE'),
 48.1010 -"DHI": ('h', 'p', 'M', None, 'D-HISTIDINE'),
 48.1011 -"RON": ('x', 'p', None, None, 'NORVALINE'),
 48.1012 -"PTH": ('y', 'p', 'M', None, 'CE1-METHYLENE-HYDROXY-PHOSPHOTYROSINE'),
 48.1013 -"T32": ('t', 'd', 'M', None, "6'-ALPHA-METHYL CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"),
 48.1014 -"LLP": ('k', 'p', 'M', None, '2-LYSINE(3-HYDROXY-2-METHYL-5-PHOSPHONOOXYMETHYL-PYRIDIN-4-YLMETHANE)'),
 48.1015 -"MNU": ('u', 'r', 'M', None, '"(2R,4S)-1-[(4R)-3,4-DIHYDROXYTETRAHYDROFURAN-2-YL]-5-[(METHYLAMINO)METHYL]-1,2,3,4-TETRAHYDROPYRIMIDINE-2,4-DIOL-5\'-MONOPHOSPHATE"'),
 48.1016 -"PTA": ('x', 'p', None, None, '"[(1-AMINO-3-METHYL-BUTYL)-HYDROXY-PHOSPHINOYL]-ACETIC'),
 48.1017 -"TZO": ('x', 'p', None, None, '"1,3-THIAZOLE-4-CARBOXYLIC'),
 48.1018 -"DBZ": ('a', 'p', 'M', None, '3-(BENZOYLAMINO)-L-ALANINE'),
 48.1019 -"MNL": ('l', 'p', 'M', None, '4,N-DIMETHYLNORLEUCINE'),
 48.1020 -"APN": ('a', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-ADENINE'),
 48.1021 -"OCY": ('c', 'p', 'M', None, 'HYDROXYETHYLCYSTEINE'),
 48.1022 -"BCS": ('c', 'p', 'M', None, 'BENZYLCYSTEINE'),
 48.1023 -"OSE": ('s', 'p', 'M', None, 'O-SULFO-L-SERINE'),
 48.1024 -"3TY": ('x', 'p', None, None, '3-[(3E)-3-(BENZYLHYDRAZONO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE'),
 48.1025 -"SCS": ('c', 'p', 'M', None, '3-(ETHYLDISULFANYL)-L-ALANINE'),
 48.1026 -"TYY": ('y', 'p', 'M', None, '3-(4-HYDROXY-3-IMINO-6-OXO-CYCLOHEXA-1,4-DIENYL)-ALANINE'),
 48.1027 -"0A5": ('n', 'p', 'M', None, 'N~2~-PROPANOYL-L-ASPARAGINE'),
 48.1028 -"PBT": ('n', 'd', None, None, '"[3-HYDROXY-5-(5-METHYL-2,4-DIOXOTETRAHYDRO-1(2H)-PYRIMIDINYL)TETRAHYDRO-2-FURANYL]METHYL'),
 48.1029 -"CWR": ('s', 'p', 'M', None, '"(4-METHYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL)ACETIC'),
 48.1030 -"TRO": ('w', 'p', 'M', None, '2-HYDROXY-TRYPTOPHAN'),
 48.1031 -"TRN": ('w', 'p', 'M', None, 'NZ2-TRYPTOPHAN'),
 48.1032 -"PYO": ('u', 'r', 'M', None, '"1-(BETA-D-RIBOFURANOSYL)-PYRIMIDIN-2-ONE-5\'-PHOSPHATE"'),
 48.1033 -"ZGU": ('g', 'r', 'M', None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-GUANINE"'),
 48.1034 -"TRG": ('k', 'p', 'M', None, '"L-(N,N'),
 48.1035 -"TRF": ('w', 'p', 'M', None, 'N1-FORMYL-TRYPTOPHAN'),
 48.1036 -"R1F": ('c', 'p', 'M', None, '3-{[(2,2,5,5-TETRAMETHYL-1-OXO-4-PHENYL-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE'),
 48.1037 -"M5M": ('c', 'r', 'M', None, '"2\'-(N-ACETAMIDE)-CYTIDINE-5\'-MONOPHOSPHATE"'),
 48.1038 -"PBF": ('f', 'p', 'M', None, 'PARA-(BENZOYL)-PHENYLALANINE'),
 48.1039 -"R1B": ('c', 'p', 'M', None, '3-{[(2,2,4,5,5-PENTAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-L-ALANINE'),
 48.1040 -"DHA": ('a', 'p', 'M', None, '"2-AMINO-ACRYLIC'),
 48.1041 -"YCM": ('c', 'p', 'M', None, 'S-(2-AMINO-2-OXOETHYL)-L-CYSTEINE'),
 48.1042 -"R1A": ('c', 'p', 'M', None, '3-{[(2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE'),
 48.1043 -"TRQ": ('w', 'p', 'M', None, '"2-AMINO-3-(6,7-DIOXO-6,7-DIHYDRO-1H-INDOL-3-YL)-PROPIONIC'),
 48.1044 -"TRP": ('W', 'p', None, None, 'TRYPTOPHAN'),
 48.1045 -"T23": ('n', 'r', None, None, '"2\'-O-METHYL-3\'-METHYL-3\'-DEOXY-ARABINOFURANOSYL-THYMINE-5\'-PHOSPHATE"'),
 48.1046 -"TRW": ('w', 'p', 'M', None, '"TRW3-(2-AMINO-3-HYDROXY-PROPYL)-6-(N\'-CYCLOHEXYL-HYDRAZINO)OCTAHYDRO-INDOL-7-OL"'),
 48.1047 -"4FW": ('w', 'p', 'M', None, '4-FLUOROTRYPTOPHANE'),
 48.1048 -"IVA": ('x', 'p', None, None, '"ISOVALERIC'),
 48.1049 -"VOL": ('x', 'p', None, None, 'L-VALINOL'),
 48.1050 -"6HT": ('t', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(THYMIN-1-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'),
 48.1051 -"6HG": ('g', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(GUANIN-9-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'),
 48.1052 -"MPJ": ('x', 'p', None, None, '"(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHINIC'),
 48.1053 -"C43": ('c', 'r', 'M', None, '"2\'-O-METHYOXYETHYL-CYTIDINE-5\'-MONOPHOSPHATE"'),
 48.1054 -"MPH": ('x', 'p', None, None, '"(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHONIC'),
 48.1055 -"AVN": ('x', 'p', None, None, '"(2S)-AMINO[(5S)-3-CHLORO-4,5-DIHYDROISOXAZOL-5-YL]ACETIC'),
 48.1056 -"M3L": ('k', 'p', 'M', None, 'N-TRIMETHYLLYSINE'),
 48.1057 -"C5C": ('c', 'p', 'M', None, '"S-CYCLOPENTYL'),
 48.1058 -"HTR": ('w', 'p', 'M', None, 'BETA-HYDROXYTRYPTOPHANE'),
 48.1059 -"MPQ": ('g', 'p', 'M', None, 'N-METHYL-ALPHA-PHENYL-GLYCINE'),
 48.1060 -"ESC": ('m', 'p', 'M', None, '"2-AMINO-4-ETHYL'),
 48.1061 -"3ME": ('n', 'd', None, None, '"PHOSPHORIC'),
 48.1062 -"TYS": ('y', 'p', 'M', None, 'O-SULFO-L-TYROSINE'),
 48.1063 -"6HA": ('a', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(ADENIN-9-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'),
 48.1064 -"KCX": ('k', 'p', 'M', None, '"LYSINE'),
 48.1065 -"6HC": ('c', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(CYTOSIN-1-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'),
 48.1066 -"AVC": ('a', 'r', 'M', None, '"ADENOSINE-5\'-MONOPHOSPHATE-2\',3\'-VANADATE"'),
 48.1067 -"GTP": ('g', 'r', 'M', None, '"GUANOSINE-5\'-TRIPHOSPHATE"'),
 48.1068 -"ACB": ('d', 'p', 'M', None, '"3-METHYL-ASPARTIC'),
 48.1069 -"2SI": ('x', 'p', None, None, '"2-O-SULFO-A-L-IDURONIC'),
 48.1070 -"NEM": ('h', 'p', 'M', None, '"NE2-METHYLATED'),
 48.1071 -"N7P": ('p', 'p', 'M', None, '1-ACETYL-D-PROLINE'),
 48.1072 -"SUB": ('x', 'p', None, None, '"3-PHENYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PROPIONIC'),
 48.1073 -"2AG": ('g', 'p', 'M', None, '"(2S)-2-AMINOPENT-4-ENOIC'),
 48.1074 -"SUN": ('s', 'p', 'M', None, 'O-[(R)-(DIMETHYLAMINO)(ETHOXY)PHOSPHORYL]-L-SERINE'),
 48.1075 -"SUI": ('x', 'p', None, None, '"(3-AMINO-2,5-DIOXO-1-PYRROLIDINYL)ACETIC'),
 48.1076 -"ACL": ('r', 'p', 'M', None, 'DEOXY-CHLOROMETHYL-ARGININE'),
 48.1077 -"SUR": ('u', 'r', 'M', None, '"1-(BETA-D-RIBOFURANOSYL)-2-THIO-URACIL-5\'-PHOSPHATE"'),
 48.1078 -"BVP": ('n', 'd', None, None, '"(E)-5-(2-BROMOVINYL)-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'),
 48.1079 -"HFA": ('x', 'p', None, None, '"ALPHA-HYDROXY-BETA-PHENYL-PROPIONIC'),
 48.1080 -"LET": ('x', 'p', None, None, '(Z)-N^6-{3-CARBOXY-1-[(4-CARBOXY-2-OXOBUTOXY)METHYL]PROPYLIDENE}-L-LYSINE'),
 48.1081 -"0A8": ('c', 'p', 'M', None, 'S-[(2-CHLOROETHYL)CARBAMOYL]-L-CYSTEINE'),
 48.1082 -"D4P": ('x', 'p', None, None, '"(2S)-AMINO(4-HYDROXYPHENYL)ACETIC'),
 48.1083 -"2OT": ('t', 'd', 'M', None, "2'-O-[2-(N,N-DIMETHYLAMINOOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"),
 48.1084 -"NEP": ('h', 'p', 'M', None, 'N1-PHOSPHONOHISTIDINE'),
 48.1085 -"PMT": ('c', 'r', 'M', None, '"PHOSPHORIC'),
 48.1086 -"2DT": ('t', 'd', 'M', None, '"3\'-DEOXYTHYMIDINE-5\'-MONOPHOSPHATE"'),
 48.1087 -"MLY": ('k', 'p', 'M', None, 'N-DIMETHYL-LYSINE'),
 48.1088 -"AD2": ('a', 'd', 'M', None, '"2\'-DEOXY-ADENOSINE-3\'-5\'-DIPHOSPHATE"'),
 48.1089 -"CGA": ('e', 'p', 'M', None, '"CARBOXYMETHYLATED'),
 48.1090 -"NBQ": ('y', 'p', 'M', None, '2-HYDROXY-5-({1-[(2-NAPHTHYLOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)TYROSINE'),
 48.1091 -"ODS": ('x', 'p', None, None, '"4-METHYLPIPERAZIN-1-YL'),
 48.1092 -"7GU": ('g', 'd', 'M', None, '"7-DEAZA-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
 48.1093 -"SAH": ('c', 'p', 'M', None, 'S-ADENOSYL-L-HOMOCYSTEINE'),
 48.1094 -"2DF": ('n', 'd', None, None, '"N-(2-DEOXY-BETA-D-ERYTHO-PENTOFURANOSYL-5-PHOSPHATE)'),
 48.1095 -"2DA": ('a', 'd', 'M', None, '"2\',3\'-DIDEOXYADENOSINE-5\'-MONOPHOSPHATE"'),
 48.1096 -"PCC": ('p', 'p', 'M', None, '5-OXOPROLINE'),
 48.1097 -"ODA": ('x', 'p', None, None, '"9-AMINO-6,10-DIOXO-OCTAHYDRO-PYRIDAZINO[1,2-A][1,2]DIAZEPINE-1-CARBOXYLIC'),
 48.1098 -"2DO": ('x', 'p', None, None, '(2S)-2-AMINOHEXANE-1,1-DIOL'),
 48.1099 -"CGU": ('e', 'p', 'M', None, '"GAMMA-CARBOXY-GLUTAMIC'),
 48.1100 -"L2A": ('x', 'p', None, None, '"(2S,5S,8S,11S,15E,20S)-20-AMINO-2-(CARBOXYMETHYL)-11,20-DIMETHYL-5,8-BIS(2-METHYLPROPYL)-3,6,9,21-TETRAOXO-1,4,7,10-TETRAAZACYCLOHENICOS-15-ENE-11-CARBOXYLIC'),
 48.1101 -"DNG": ('l', 'p', 'M', None, 'N-FORMYL-D-NORLEUCINE'),
 48.1102 -"UMA": ('a', 'p', 'M', None, '"URIDINE-5\'-DIPHOSPHATE-N-ACETYLMURAMOYL-L-ALANINE"'),
 48.1103 -"PM3": ('f', 'p', 'M', None, '"2-AMINO-3-(4-PHOSPHONOMETHYL-PHENYL)-PROPIONIC'),
 48.1104 -"DNM": ('l', 'p', 'M', None, 'N-METHYL-D-NORLEUCINE'),
 48.1105 -"DNL": ('k', 'p', 'M', None, '6-AMINO-HEXANAL'),
 48.1106 -"GMA": ('e', 'p', 'M', None, '"4-AMIDO-4-CARBAMOYL-BUTYRIC'),
 48.1107 -"1LU": ('l', 'p', 'M', None, '"4-METHYL-PENTANOIC'),
 48.1108 -"CDV": ('x', 'p', None, None, '"3-METHYL-2-UREIDO-BUTYRIC'),
 48.1109 -"ADX": ('n', 'r', None, None, '"ADENOSINE-5\'-PHOSPHOSULFATE"'),
 48.1110 -"LKC": ('n', 'd', None, None, '4-AMINO-1-[(1S,3R,4R,7S)-7-HYDROXY-1-(HYDROXYMETHYL)-2,5-DIOXABICYCLO[2.2.1]HEPT-3-YL]-5-METHYLPYRIMIDIN-2(1H)-ONE'),
 48.1111 -"ADD": ('x', 'p', None, None, '"2,6,8-TRIMETHYL-3-AMINO-9-BENZYL-9-METHOXYNONANOIC'),
 48.1112 -"CUC": ('x', 'p', None, None, '"CYCLOHEXYLGLYCYL'),
 48.1113 -"DNP": ('a', 'p', 'M', None, '3-AMINO-ALANINE'),
 48.1114 -"DNS": ('k', 'p', 'M', None, 'N~6~-{[5-(DIMETHYLAMINO)-1-NAPHTHYL]SULFONYL}-L-LYSINE'),
 48.1115 -"DNR": ('c', 'd', 'M', None, "2'-DEOXY-N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"),
 48.1116 -"CG1": ('g', 'r', 'M', None, '"5\'-O-[(R)-HYDROXY(METHOXY)PHOSPHORYL]GUANOSINE"'),
 48.1117 -"Z": ('c', 'd', 'M', None, 'ZEBULARINE'),
 48.1118 -"GMU": ('n', 'd', None, None, "2'-O-[(2-GUANIDINIUM)ETHYL]-5-METHYLURIDINE 5'-MONOPHOSPHATE"),
 48.1119 -"C99": ('x', 'p', None, None, '"{(2R)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-2-HYDROXY-4,5-DIOXOIMIDAZOLIDIN-1-YL}ACETIC'),
 48.1120 -"GDP": ('g', 'r', 'M', None, '"GUANOSINE-5\'-DIPHOSPHATE"'),
 48.1121 -"MED": ('m', 'p', 'M', None, 'D-METHIONINE'),
 48.1122 -"GDR": ('g', 'd', 'M', None, '"GUANOSINE-5\'-DIPHOSPHATE-RHAMNOSE"'),
 48.1123 -"MEA": ('f', 'p', 'M', None, 'N-METHYLPHENYLALANINE'),
 48.1124 -"LEF": ('l', 'p', 'M', None, '(4S)-5-FLUORO-L-LEUCINE'),
 48.1125 -"LEH": ('l', 'p', 'M', None, 'N-[12-(1H-IMIDAZOL-1-YL)DODECANOYL]-L-LEUCINE'),
 48.1126 -"CHG": ('x', 'p', None, None, '"CYCLOHEXYL-GLYCINE"'),
 48.1127 -"MEN": ('n', 'p', 'M', None, '"N-METHYL'),
 48.1128 -"G42": ('g', 'd', 'M', None, '"8-OXO-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'),
 48.1129 -"5CG": ('g', 'd', 'M', None, '"5\'-CHLORO-5\'-DEOXY-GUANOSINE"'),
 48.1130 -"T31": ('u', 'r', 'M', None, "THYMIDINE 5'-MONOTHIOPHOSPHATE"),
 48.1131 -"MEU": ('g', 'p', 'M', None, 'O-METHYL-GLYCINE'),
 48.1132 -"MET": ('M', 'p', None, None, 'METHIONINE'),
 48.1133 -"U34": ('u', 'r', 'M', None, "URIDINE 5'-MONOMETHYLPHOSPHATE"),
 48.1134 -"MEQ": ('q', 'p', 'M', None, 'N5-METHYLGLUTAMINE'),
 48.1135 -"MEP": ('u', 'r', 'M', None, '"PHOSPHORIC'),
 48.1136 -"B1P": ('n', 'd', None, None, '2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSE'),
 48.1137 -"IG": ('g', 'r', 'M', None, '"ISOGUANOSINE-5\'-MONOPHOSPHATE"'),
 48.1138 -"T2S": ('n', 'r', None, None, '"THYMIDINE-5\'-DITHIOPHOSPHORATE"'),
 48.1139 -"ZHP": ('n', 'r', None, None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-HYDROXYPYRIDONE"'),
 48.1140 -"AZK": ('k', 'p', 'M', None, '"(S)-2-AMINO-6-AZIDOHEXANOIC'),
 48.1141 -"U31": ('u', 'r', 'M', None, "2'-O-3-AMINOPROPYL 2'-DEOXYURIDINE-5'-MONOPHOSPHATE"),
 48.1142 -"LTR": ('w', 'p', 'M', None, 'L-TRYPTOPHAN'),
 48.1143 -"MSU": ('x', 'p', None, None, '"SUCCINIC'),
 48.1144 -"1PA": ('f', 'p', 'M', None, '"PHENYLMETHYLACETIC'),
 48.1145 -"MSP": ('a', 'p', 'M', None, '"5\'-O-[(L-METHIONYL)-SULPHAMOYL]ADENOSINE"'),
 48.1146 -"MSO": ('m', 'p', 'M', None, '"SELENOMETHIONINE'),
 48.1147 -"AZY": ('y', 'p', 'M', None, '3-AZIDO-L-TYROSINE'),
 48.1148 -"ENA": ('n', 'r', None, None, 'ETHENO-NAD'),
 48.1149 -"MSL": ('m', 'p', 'M', None, '"(2S)-2-AMINO-4-(METHYLSULFONIMIDOYL)BUTANOIC'),
 48.1150 -"PF5": ('f', 'p', 'M', None, '2,3,4,5,6-PENTAFLUORO-L-PHENYLALANINE'),
 48.1151 -"1PR": ('n', 'r', None, None, '"PHOSPHORIC'),
 48.1152 -"3AR": ('x', 'p', None, None, 'N-OMEGA-PROPYL-L-ARGININE'),
 48.1153 -"MSE": ('m', 'p', 'M', None, 'SELENOMETHIONINE'),
 48.1154 -"AZS": ('s', 'p', 'M', None, 'O-DIAZOACETYL-L-SERINE'),
 48.1155 -"MSA": ('g', 'p', 'M', None, '"(2-S-METHYL)'),
 48.1156 -"R2P": ('x', 'p', None, None, '"(2R)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC'),
 48.1157 -"CAR": ('c', 'd', 'M', None, "CYTOSINE ARABINOSE-5'-PHOSPHATE"),
 48.1158 -"DAB": ('a', 'p', 'M', None, '"2,4-DIAMINOBUTYRIC'),
 48.1159 -"OTY": ('y', 'p', 'M', None, '"2-AMINO-3-(4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC'),
 48.1160 -"DAL": ('a', 'p', 'M', None, 'D-ALANINE'),
 48.1161 -"DAM": ('x', 'p', None, None, 'N-METHYL-ALPHA-BETA-DEHYDROALANINE'),
 48.1162 -"TT": ('n', 'd', None, None, '"[(1R,3R,4S,9R,10S,12R,15AS,15BR,18BR,18CS)-10-HYDROXY-15A,15B-DIMETHYL-13,15,16,18-TETRAOXOHEXADECAHYDRO-8H-9,12-EPOXY-1,4-METHANO-2,5,7-TRIOXA-12A,14,17,18A-TETRAAZACYCLOHEXADECA[1,2,3,4-DEF]BIPHENYLEN-3-YL]METHYL DIHYDROGEN PHOSPHATE"'),
 48.1163 -"IOY": ('f', 'p', 'M', None, 'P-IODO-D-PHENYLALANINE'),
 48.1164 -"DAH": ('f', 'p', 'M', None, '3,4-DIHYDROXYPHENYLALANINE'),
 48.1165 -"8MG": ('g', 'd', 'M', None, '"8-METHYL-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'),
 48.1166 -"BZG": ('n', 'd', None, None, '6-(BENZYLOXY)-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURIN-2-AMINE'),
 48.1167 -"ARM": ('r', 'p', 'M', None, 'DEOXY-METHYL-ARGININE'),
 48.1168 -"DAR": ('r', 'p', 'M', None, 'D-ARGININE'),
 48.1169 -"DAS": ('d', 'p', 'M', None, '"D-ASPARTIC'),
 48.1170 -"MCY": ('c', 'd', 'M', None, '"5-METHYL-2\'-DEOXYCYTIDINE"'),
 48.1171 -"SGB": ('s', 'p', 'M', None, 'O-[(S)-METHYL(1-METHYLETHOXY)PHOSPHORYL]-L-SERINE'),
 48.1172 -"YRR": ('n', 'd', None, None, '"3-HYDROXY-PYRROLIDIN-2-YLMETHYL-MONOPHOSPHATE'),
 48.1173 -"NF2": ('n', 'r', None, None, '(1S)-1,4-ANHYDRO-1-(2,4-DIFLUORO-5-METHYLPHENYL)-5-O-PHOSPHONO-D-RIBITOL'),
 48.1174 -"CNU": ('u', 'r', 'M', None, '"5-CYANO-URIDINE-5\'-MONOPHOSPHATE"'),
 48.1175 -"E": ('a', 'd', 'M', None, "N-((-)-(7S,8R,9S,10R)[7,8,9-TRIHYDROXY-7,8,9,10-TETRA HYDROBENZO[A]PYREN-10-YL])-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"),
 48.1176 -"AA3": ('a', 'p', 'M', None, '"2-AMINOBUTYRIC'),
 48.1177 -"GVL": ('x', 'p', None, None, 'O-[(R)-{[(3R)-4-AMINO-3-HYDROXY-2,2-DIMETHYL-4-OXOBUTYL]OXY}(HYDROXY)PHOSPHORYL]-L-SERINE'),
 48.1178 -"TNR": ('s', 'p', 'M', None, 'O-(2-ACETAMIDO-2-DEOXY-ALPHA-D-GALACTOPYRANOSYL)-L-SERINE'),
 48.1179 -"HOL": ('n', 'd', None, None, '"CHOLEST-5-EN-3-YL'),
 48.1180 -"PTM": ('y', 'p', 'M', None, 'ALPHA-METHYL-O-PHOSPHOTYROSINE'),
 48.1181 -"AS": ('a', 'd', 'M', None, "2-DEOXY-ADENOSINE -5'-THIO-MONOPHOSPHATE"),
 48.1182 -"U": ('U', 'r', None, None, '"URIDINE-5\'-MONOPHOSPHATE"'),
 48.1183 -"ESB": ('y', 'p', 'M', None, '3-[(3E)-3-(ETHYLIMINO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE'),
 48.1184 -"DA2": ('x', 'p', None, None, 'NG,NG-DIMETHYL-L-ARGININE'),
 48.1185 -"TNB": ('c', 'p', 'M', None, 'S-(2,3,6-TRINITROPHENYL)CYSTEINE'),
 48.1186 -"RMP": ('a', 'd', 'M', None, '"2\'-DEOXY-ADENOSINE-5\'-RP-MONOMETHYLPHOSPHONATE"'),
 48.1187 -"SME": ('m', 'p', 'M', None, '"METHIONINE'),
 48.1188 -"NDF": ('f', 'p', 'M', None, 'N-(CARBOXYCARBONYL)-D-PHENYLALANINE'),
 48.1189 -"BOE": ('t', 'd', 'M', None, "2'-O-[2-(BENZYLOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"),
 48.1190 -"LGP": ('g', 'd', 'M', None, '"N9-1-HYDROXY-PROP-2-OXYMETHYL-GUANINE-3\'-MONOPHOSPHATE"'),
 48.1191 -"SCH": ('c', 'p', 'M', None, 'S-METHYL-THIO-CYSTEINE'),
 48.1192 -"T2T": ('n', 'd', None, None, '[(2S,3S,5R)-3-[(2S)-3-({[(2R,3S,4R,5R)-3-HYDROXY-4-METHOXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL}AMINO)-2-METHYL-3-OXOPROPYL]-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL DIHYDROGEN PHOSPHATE'),
 48.1193 -"PFF": ('f', 'p', 'M', None, '4-FLUORO-L-PHENYLALANINE'),
 48.1194 -"PCE": ('x', 'p', None, None, '"2-(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-PENT-4-YNOIC'),
 48.1195 -"DFO": ('x', 'p', None, None, '2,2-DIFLUORO-3-HYDROSTATINE'),
 48.1196 -"DXN": ('n', 'd', None, None, '(1R,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL'),
 48.1197 -"KGC": ('k', 'p', 'M', None, 'N~6~-[(2R)-2-CARBOXY-5-OXOTETRAHYDROFURAN-2-YL]-L-LYSINE'),
 48.1198 -"US1": ('n', 'd', None, None, "2'-DEOXY-3'-THIOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"),
 48.1199 -"HHK": ('x', 'p', None, None, '"(2S)-2,8-DIAMINOOCTANOIC'),
 48.1200 -"DXD": ('n', 'd', None, None, '(1S,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL'),
 48.1201 -"ZCY": ('c', 'r', 'M', None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-CYTOSINE"'),
 48.1202 -"IEL": ('k', 'p', 'M', None, 'N~6~-[(1Z)-ETHANIMIDOYL]-L-LYSINE'),
 48.1203 -}
    49.1 --- a/lib/allpy_data/__init__.py	Mon Jan 24 21:40:10 2011 +0300
    49.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.3 @@ -1,3 +0,0 @@
    49.4 -""" 
    49.5 -Module that contains various data relevant to biological sequences.
    49.6 -"""
    50.1 --- a/lib/allpy_data/amino_acids.py	Mon Jan 24 21:40:10 2011 +0300
    50.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.3 @@ -1,8 +0,0 @@
    50.4 -
    50.5 -amino_acids = [
    50.6 -    'A', 'C', 'D', 'E', 'F', 
    50.7 -    'G', 'H', 'I', 'K', 'L', 
    50.8 -    'M', 'N', 'P', 'Q', 'R', 
    50.9 -    'S', 'T', 'V', 'W', 'Y',
   50.10 -]
   50.11 -
    51.1 --- a/lib/allpy_data/blossum62.py	Mon Jan 24 21:40:10 2011 +0300
    51.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    51.3 @@ -1,28 +0,0 @@
    51.4 -matrix = {
    51.5 -"A": {"A":  4, "R": -1, "N": -2, "D": -2, "C":  0, "Q": -1, "E": -1, "G":  0, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S":  1, "T":  0, "W": -3, "Y": -2, "V":  0, "B": -2, "Z": -1, "X":  0, "*": -4},
    51.6 -"R": {"A": -1, "R":  5, "N":  0, "D": -2, "C": -3, "Q":  1, "E":  0, "G": -2, "H":  0, "I": -3, "L": -2, "K":  2, "M": -1, "F": -3, "P": -2, "S": -1, "T": -1, "W": -3, "Y": -2, "V": -3, "B": -1, "Z":  0, "X": -1, "*": -4},
    51.7 -"N": {"A": -2, "R":  0, "N":  6, "D":  1, "C": -3, "Q":  0, "E":  0, "G":  0, "H":  1, "I": -3, "L": -3, "K":  0, "M": -2, "F": -3, "P": -2, "S":  1, "T":  0, "W": -4, "Y": -2, "V": -3, "B":  3, "Z":  0, "X": -1, "*": -4},
    51.8 -"D": {"A": -2, "R": -2, "N":  1, "D":  6, "C": -3, "Q":  0, "E":  2, "G": -1, "H": -1, "I": -3, "L": -4, "K": -1, "M": -3, "F": -3, "P": -1, "S":  0, "T": -1, "W": -4, "Y": -3, "V": -3, "B":  4, "Z":  1, "X": -1, "*": -4},
    51.9 -"C": {"A":  0, "R": -3, "N": -3, "D": -3, "C":  9, "Q": -3, "E": -4, "G": -3, "H": -3, "I": -1, "L": -1, "K": -3, "M": -1, "F": -2, "P": -3, "S": -1, "T": -1, "W": -2, "Y": -2, "V": -1, "B": -3, "Z": -3, "X": -2, "*": -4},
   51.10 -"Q": {"A": -1, "R":  1, "N":  0, "D":  0, "C": -3, "Q":  5, "E":  2, "G": -2, "H":  0, "I": -3, "L": -2, "K":  1, "M":  0, "F": -3, "P": -1, "S":  0, "T": -1, "W": -2, "Y": -1, "V": -2, "B":  0, "Z":  3, "X": -1, "*": -4},
   51.11 -"E": {"A": -1, "R":  0, "N":  0, "D":  2, "C": -4, "Q":  2, "E":  5, "G": -2, "H":  0, "I": -3, "L": -3, "K":  1, "M": -2, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  1, "Z":  4, "X": -1, "*": -4},
   51.12 -"G": {"A":  0, "R": -2, "N":  0, "D": -1, "C": -3, "Q": -2, "E": -2, "G":  6, "H": -2, "I": -4, "L": -4, "K": -2, "M": -3, "F": -3, "P": -2, "S":  0, "T": -2, "W": -2, "Y": -3, "V": -3, "B": -1, "Z": -2, "X": -1, "*": -4},
   51.13 -"H": {"A": -2, "R":  0, "N":  1, "D": -1, "C": -3, "Q":  0, "E":  0, "G": -2, "H":  8, "I": -3, "L": -3, "K": -1, "M": -2, "F": -1, "P": -2, "S": -1, "T": -2, "W": -2, "Y":  2, "V": -3, "B":  0, "Z":  0, "X": -1, "*": -4},
   51.14 -"I": {"A": -1, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -3, "E": -3, "G": -4, "H": -3, "I":  4, "L":  2, "K": -3, "M":  1, "F":  0, "P": -3, "S": -2, "T": -1, "W": -3, "Y": -1, "V":  3, "B": -3, "Z": -3, "X": -1, "*": -4},
   51.15 -"L": {"A": -1, "R": -2, "N": -3, "D": -4, "C": -1, "Q": -2, "E": -3, "G": -4, "H": -3, "I":  2, "L":  4, "K": -2, "M":  2, "F":  0, "P": -3, "S": -2, "T": -1, "W": -2, "Y": -1, "V":  1, "B": -4, "Z": -3, "X": -1, "*": -4},
   51.16 -"K": {"A": -1, "R":  2, "N":  0, "D": -1, "C": -3, "Q":  1, "E":  1, "G": -2, "H": -1, "I": -3, "L": -2, "K":  5, "M": -1, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  1, "X": -1, "*": -4},
   51.17 -"M": {"A": -1, "R": -1, "N": -2, "D": -3, "C": -1, "Q":  0, "E": -2, "G": -3, "H": -2, "I":  1, "L":  2, "K": -1, "M":  5, "F":  0, "P": -2, "S": -1, "T": -1, "W": -1, "Y": -1, "V":  1, "B": -3, "Z": -1, "X": -1, "*": -4},
   51.18 -"F": {"A": -2, "R": -3, "N": -3, "D": -3, "C": -2, "Q": -3, "E": -3, "G": -3, "H": -1, "I":  0, "L":  0, "K": -3, "M":  0, "F":  6, "P": -4, "S": -2, "T": -2, "W":  1, "Y":  3, "V": -1, "B": -3, "Z": -3, "X": -1, "*": -4},
   51.19 -"P": {"A": -1, "R": -2, "N": -2, "D": -1, "C": -3, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -3, "L": -3, "K": -1, "M": -2, "F": -4, "P":  7, "S": -1, "T": -1, "W": -4, "Y": -3, "V": -2, "B": -2, "Z": -1, "X": -2, "*": -4},
   51.20 -"S": {"A":  1, "R": -1, "N":  1, "D":  0, "C": -1, "Q":  0, "E":  0, "G":  0, "H": -1, "I": -2, "L": -2, "K":  0, "M": -1, "F": -2, "P": -1, "S":  4, "T":  1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  0, "X":  0, "*": -4},
   51.21 -"T": {"A":  0, "R": -1, "N":  0, "D": -1, "C": -1, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S":  1, "T":  5, "W": -2, "Y": -2, "V":  0, "B": -1, "Z": -1, "X":  0, "*": -4},
   51.22 -"W": {"A": -3, "R": -3, "N": -4, "D": -4, "C": -2, "Q": -2, "E": -3, "G": -2, "H": -2, "I": -3, "L": -2, "K": -3, "M": -1, "F":  1, "P": -4, "S": -3, "T": -2, "W": 11, "Y":  2, "V": -3, "B": -4, "Z": -3, "X": -2, "*": -4},
   51.23 -"Y": {"A": -2, "R": -2, "N": -2, "D": -3, "C": -2, "Q": -1, "E": -2, "G": -3, "H":  2, "I": -1, "L": -1, "K": -2, "M": -1, "F":  3, "P": -3, "S": -2, "T": -2, "W":  2, "Y":  7, "V": -1, "B": -3, "Z": -2, "X": -1, "*": -4},
   51.24 -"V": {"A":  0, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -2, "E": -2, "G": -3, "H": -3, "I":  3, "L":  1, "K": -2, "M":  1, "F": -1, "P": -2, "S": -2, "T":  0, "W": -3, "Y": -1, "V":  4, "B": -3, "Z": -2, "X": -1, "*": -4},
   51.25 -"B": {"A": -2, "R": -1, "N":  3, "D":  4, "C": -3, "Q":  0, "E":  1, "G": -1, "H":  0, "I": -3, "L": -4, "K":  0, "M": -3, "F": -3, "P": -2, "S":  0, "T": -1, "W": -4, "Y": -3, "V": -3, "B":  4, "Z":  0, "X": -1, "*": -4},
   51.26 -"Z": {"A": -1, "R":  0, "N":  0, "D":  1, "C": -3, "Q":  3, "E":  4, "G": -2, "H":  0, "I": -3, "L": -3, "K":  1, "M": -1, "F": -3, "P": -1, "S":  0, "T": -1, "W": -3, "Y": -2, "V": -2, "B":  0, "Z":  4, "X": -1, "*": -4},
   51.27 -"X": {"A":  0, "R": -1, "N": -1, "D": -1, "C": -2, "Q": -1, "E": -1, "G": -1, "H": -1, "I": -1, "L": -1, "K": -1, "M": -1, "F": -1, "P": -2, "S":  0, "T":  0, "W": -2, "Y": -1, "V": -1, "B": -1, "Z": -1, "X": -1, "*": -4},
   51.28 -"*": {"A": -4, "R": -4, "N": -4, "D": -4, "C": -4, "Q": -4, "E": -4, "G": -4, "H": -4, "I": -4, "L": -4, "K": -4, "M": -4, "F": -4, "P": -4, "S": -4, "T": -4, "W": -4, "Y": -4, "V": -4, "B": -4, "Z": -4, "X": -4, "*":  1}
   51.29 -}
   51.30 -
   51.31 -gaps = (-8, -4, -2, -1)
    52.1 --- a/lib/allpy_pdb.py	Mon Jan 24 21:40:10 2011 +0300
    52.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    52.3 @@ -1,67 +0,0 @@
    52.4 -
    52.5 -import re
    52.6 -from Bio.PDB import PDBParser
    52.7 -
    52.8 -""" Functions to get pdb information from fasta id
    52.9 -and to generate fasta id from pdb information
   52.10 -
   52.11 -pdb information: code, chain, model
   52.12 -
   52.13 -TODO: same for local pdb files
   52.14 -"""
   52.15 -
   52.16 -# for pdb-codes
   52.17 -re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I)
   52.18 -
   52.19 -#~ # for files
   52.20 -#~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$")
   52.21 -
   52.22 -def std_id(pdb_id, pdb_chain, pdb_model=None):
   52.23 -    if pdb_model:
   52.24 -        return "%s_%s_%s" % \
   52.25 -        (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model)
   52.26 -    else:
   52.27 -        return "%s_%s" % \
   52.28 -        (pdb_id.lower().strip(), pdb_chain.upper().strip())
   52.29 -    
   52.30 -def pdb_id_parse(ID):
   52.31 -    match = re1.search(ID)
   52.32 -    if not match:
   52.33 -        return None
   52.34 -    d = match.groupdict()
   52.35 -    if 'chain' not in d or not d['chain']:
   52.36 -        d['chain'] = ' '
   52.37 -    if 'model' not in d or not d['model']:
   52.38 -        d['model'] = 0
   52.39 -    return d
   52.40 -    
   52.41 -    
   52.42 -def get_structure(file, name):
   52.43 -    return PDBParser().get_structure(name, file)
   52.44 -    
   52.45 -#~ def std_id_parse(ID):
   52.46 -    #~ """
   52.47 -    #~ Parse standart ID to pdb_code, chain and model
   52.48 -    #~ """
   52.49 -    #~ if '.ent' in ID.lower() or '.pdb' in ID.lower():
   52.50 -        #~ # it is file
   52.51 -        #~ parseO = self.re2.search(ID) # files
   52.52 -    #~ else:
   52.53 -        #~ parseO = self.re1.search(ID.lower()) # pdb codes
   52.54 -    #~ if not parseO:
   52.55 -        #~ return None
   52.56 -    #~ parse = parseO.groups()
   52.57 -    #~ if len(parse) < 2:
   52.58 -        #~ return None
   52.59 -    #~ code = parse[1]
   52.60 -    #~ chain = ''
   52.61 -    #~ model = None
   52.62 -    #~ if len(parse) >= 4:
   52.63 -        #~ chain = parse[3]
   52.64 -        #~ if chain:
   52.65 -            #~ chain = chain.upper()
   52.66 -        #~ if len(parse) >= 6:
   52.67 -            #~ if parse[5]:
   52.68 -                #~ model = parse[5]
   52.69 -    #~ return code, chain, model
   52.70 -    
    53.1 --- a/lib/block.py	Mon Jan 24 21:40:10 2011 +0300
    53.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    53.3 @@ -1,172 +0,0 @@
    53.4 -#!usr/bin/python
    53.5 -
    53.6 -import sys
    53.7 -
    53.8 -import project
    53.9 -import sequence
   53.10 -import monomer
   53.11 -import config
   53.12 -from graph import Graph
   53.13 -from Bio.PDB import Superimposer
   53.14 -from tempfile import NamedTemporaryFile
   53.15 -import os
   53.16 -from fasta import save_fasta
   53.17 -
   53.18 -class Block(object):
   53.19 -    """ Block of alignment
   53.20 -    
   53.21 -    Mandatory data:
   53.22 -    *   self.project -- project object, which the block belongs to
   53.23 -    *   self.sequences - set of sequence objects that contain monomers
   53.24 -        and/or gaps, that constitute the block
   53.25 -    *   self.positions -- sorted list of positions of the project.alignment that
   53.26 -        are included in the block
   53.27 -    
   53.28 -    Don't change self.sequences -- it may be a link to other block.sequences
   53.29 -    
   53.30 -    How to create a new block:
   53.31 -    >>> import project
   53.32 -    >>> import block
   53.33 -    >>> proj = project.Project(open("test.fasta"))
   53.34 -    >>> block1 = block.Block(proj)
   53.35 -    """
   53.36 -    
   53.37 -    def __init__(self, project, sequences=None, positions=None):
   53.38 -        """ Builds new block from project
   53.39 -        
   53.40 -        if sequences==None, all sequences are used
   53.41 -        if positions==None, all positions are used
   53.42 -        """
   53.43 -        if sequences == None:
   53.44 -            sequences = set(project.sequences) # copy
   53.45 -        if positions == None:
   53.46 -            positions = range(len(project))
   53.47 -        self.project = project
   53.48 -        self.sequences = sequences
   53.49 -        self.positions = positions
   53.50 -    
   53.51 -    def save_fasta(self, out_file, long_line=70, gap='-'):
   53.52 -        """ Saves alignment to given file in fasta-format 
   53.53 -        
   53.54 -        No changes in the names, descriptions or order of the sequences
   53.55 -        are made.
   53.56 -        """
   53.57 -        for sequence in self.sequences:
   53.58 -            alignment_monomers = self.project.alignment[sequence]
   53.59 -            block_monomers = [alignment_monomers[i] for i in self.positions]
   53.60 -            string = ''.join([m.type.code1 if m else '-' for m in block_monomers])
   53.61 -            save_fasta(out_file, string, sequence.name, sequence.description, long_line)
   53.62 -    
   53.63 -    def geometrical_cores(self, max_delta=config.delta, 
   53.64 -    timeout=config.timeout, minsize=config.minsize, 
   53.65 -    ac_new_atoms=config.ac_new_atoms,
   53.66 -    ac_count=config.ac_count):
   53.67 -        """ Returns length-sorted list of blocks, representing GCs
   53.68 -        
   53.69 -        max_delta -- threshold of distance spreading
   53.70 -        timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm)
   53.71 -        minsize -- min size of each core
   53.72 -        ac_new_atoms -- min part or new atoms in new alternative core
   53.73 -            current GC is compared with each of already selected GCs
   53.74 -            if difference is less then ac_new_atoms, current GC is skipped
   53.75 -            difference = part of new atoms in current core
   53.76 -        ac_count -- max number of cores (including main core)
   53.77 -            -1 means infinity
   53.78 -        If more than one pdb chain for some sequence provided, consider all of them
   53.79 -        cost is calculated as 1 / (delta + 1) 
   53.80 -            delta in [0, +inf) => cost in (0, 1]
   53.81 -        """
   53.82 -        nodes = self.positions
   53.83 -        lines = {}
   53.84 -        for i in self.positions:
   53.85 -            for j in self.positions:
   53.86 -                if i < j:
   53.87 -                    distances = []
   53.88 -                    for sequence in self.sequences:
   53.89 -                        for chain in sequence.pdb_chains:
   53.90 -                            m1 = self.project.alignment[sequence][i]
   53.91 -                            m2 = self.project.alignment[sequence][j]
   53.92 -                            if m1 and m2:
   53.93 -                                r1 = sequence.pdb_residues[chain][m1]
   53.94 -                                r2 = sequence.pdb_residues[chain][m2]
   53.95 -                                ca1 = r1['CA']
   53.96 -                                ca2 = r2['CA']
   53.97 -                                d = ca1 - ca2 # Bio.PDB feature
   53.98 -                                distances.append(d)
   53.99 -                    if len(distances) >= 2:
  53.100 -                        delta = max(distances) - min(distances)
  53.101 -                        if delta <= max_delta:
  53.102 -                            lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta)
  53.103 -        graph = Graph(nodes, lines)
  53.104 -        cliques = graph.cliques(timeout=timeout, minsize=minsize)
  53.105 -        GCs = []
  53.106 -        for clique in cliques:
  53.107 -            for GC in GCs:
  53.108 -                if len(clique - set(GC.positions)) < ac_new_atoms * len(clique):
  53.109 -                    break
  53.110 -            else:
  53.111 -                GCs.append(Block(self.project, self.sequences, clique))
  53.112 -                if ac_count != -1 and len(GCs) >= ac_count:
  53.113 -                    break
  53.114 -        return GCs
  53.115 -    
  53.116 -    def xstring(self, x='X', gap='-'):
  53.117 -        """ Returns string consisting of gap chars and chars x at self.positions
  53.118 -        
  53.119 -        Length of returning string = length of project
  53.120 -        """
  53.121 -        monomers = [False] * len(self.project)
  53.122 -        for i in self.positions:
  53.123 -            monomers[i] = True
  53.124 -        return ''.join([x if m else gap for m in monomers])
  53.125 -    
  53.126 -    def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70):
  53.127 -        """ Save xstring and name in fasta format """
  53.128 -        save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line)
  53.129 -    
  53.130 -    def monomers(self, sequence):
  53.131 -        """ Iterates monomers of this sequence from this block """
  53.132 -        alignment_sequence = self.project.alignment[sequence]
  53.133 -        return (alignment_sequence[i] for i in self.positions)
  53.134 -    
  53.135 -    def ca_atoms(self, sequence, pdb_chain):
  53.136 -        """ Iterates Ca-atom of monomers of this sequence from this block  """
  53.137 -        return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers())
  53.138 -    
  53.139 -    def sequences_chains(self):
  53.140 -        """ Iterates pairs (sequence, chain) """
  53.141 -        for sequence in self.project.sequences:
  53.142 -            if sequence in self.sequences:
  53.143 -                for chain in sequence.pdb_chains:
  53.144 -                    yield (sequence, chain)
  53.145 -    
  53.146 -    def superimpose(self):
  53.147 -        """ Superimpose all pdb_chains in this block """
  53.148 -        sequences_chains = list(self.sequences_chains())
  53.149 -        if len(sequences_chains) >= 1:
  53.150 -            sup = Superimposer()
  53.151 -            fixed_sequence, fixed_chain = sequences_chains.pop()
  53.152 -            fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain)
  53.153 -            for sequence, chain in sequences_chains:
  53.154 -                moving_atoms =  self.ca_atoms(sequence, chain)
  53.155 -                sup.set_atoms(fixed_atoms, moving_atoms)
  53.156 -                # Apply rotation/translation to the moving atoms
  53.157 -                sup.apply(moving_atoms)
  53.158 -    
  53.159 -    def pdb_save(self, out_file):
  53.160 -        """ Save all sequences 
  53.161 -        
  53.162 -        Returns {(sequence, chain): CHAIN}
  53.163 -        CHAIN is chain letter in new file
  53.164 -        """
  53.165 -        tmp_file = NamedTemporaryFile(delete=False)
  53.166 -        tmp_file.close()
  53.167 -        
  53.168 -        for sequence, chain in self.sequences_chains():
  53.169 -            sequence.pdb_save(tmp_file.name, chain)
  53.170 -            # TODO: read from tmp_file.name
  53.171 -            # change CHAIN
  53.172 -            # add to out_file
  53.173 -        
  53.174 -        os.unlink(NamedTemporaryFile)
  53.175 -    
    54.1 --- a/lib/config.py	Mon Jan 24 21:40:10 2011 +0300
    54.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.3 @@ -1,18 +0,0 @@
    54.4 -
    54.5 -delta = 2.0 # for geometrical core building
    54.6 -minsize = 20 # min size of returning cores
    54.7 -maxabsent = 0.15 # deprecated?
    54.8 -
    54.9 -# pdb download url (XXXX is pdb code place)
   54.10 -pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb'
   54.11 -pdb_dir = '/tmp/%s.pdb'
   54.12 -timeout = 10 # time in sec. for BRON-KERBOSH algorithm
   54.13 -
   54.14 -
   54.15 -# min part or new atoms in new alternative core
   54.16 -ac_new_atoms = 0.5
   54.17 -
   54.18 -# max number of cores (including main core)
   54.19 -ac_count = 5
   54.20 -
   54.21 -
    55.1 --- a/lib/fasta.py	Mon Jan 24 21:40:10 2011 +0300
    55.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.3 @@ -1,21 +0,0 @@
    55.4 -def save_fasta(out_file, string, name, description='', long_line=70):
    55.5 -    """ Saves given string to out_file in fasta_format 
    55.6 -    
    55.7 -    Splits long lines to substrings of length=long_line
    55.8 -    To prevent this, set long_line=None 
    55.9 -    """
   55.10 -    out_file.write(">%(name)s %(description)s \n" % {'name':name, 'description':description})        
   55.11 -    if long_line:
   55.12 -        for i in range(0, len(string) // long_line + 1):
   55.13 -            out_file.write("%s \n" % string[i*long_line : i*long_line + long_line])
   55.14 -    else:
   55.15 -        out_file.write("%s \n" % string)
   55.16 -
   55.17 -def determine_long_line(in_file):
   55.18 -    """ Returns maximum sequence line length in fasta file """
   55.19 -    sequences = in_file.read().split('>')
   55.20 -    for sequence in sequences[1:]:
   55.21 -        lines = sequence.split('\n')[1:]
   55.22 -        if len(lines) >= 2:
   55.23 -            return len(lines[0].strip())
   55.24 -    return 70
    56.1 --- a/lib/graph.py	Mon Jan 24 21:40:10 2011 +0300
    56.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    56.3 @@ -1,288 +0,0 @@
    56.4 -# -*- coding: utf-8 -*- 
    56.5 -
    56.6 -from datetime import datetime, timedelta
    56.7 -from copy import copy
    56.8 -
    56.9 -class TimeoutError(Exception):
   56.10 -    pass
   56.11 -
   56.12 -
   56.13 -
   56.14 -class Graph(object):
   56.15 -    """ Undirected weighted graph
   56.16 -    
   56.17 -    Data:
   56.18 -    nodes -- set of elements
   56.19 -    lines -- {line: cost}. 
   56.20 -        line is frozenset([e1, e2])
   56.21 -        cost is float in (0, 1] or 1 (if all lines are equal)
   56.22 -        
   56.23 -    >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1})
   56.24 -    >>> g.fast_cliques()
   56.25 -    Fast algorithm started
   56.26 -    [frozenset([1, 2]), frozenset([3])]
   56.27 -    >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1, frozenset([1,1]): 1})
   56.28 -    >>> g.fast_cliques()
   56.29 -    Fast algorithm started
   56.30 -    [frozenset([1, 2]), frozenset([3])]
   56.31 -    >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98,
   56.32 -    ... frozenset([2,3]): 0.1, frozenset([1,1]): 1})
   56.33 -    >>> g.fast_cliques()
   56.34 -    Fast algorithm started
   56.35 -    [frozenset([1, 2, 3]), frozenset([4])]
   56.36 -    >>> g.bron_kerbosh()
   56.37 -    Bron and Kerbosh algorithm started
   56.38 -    [frozenset([1, 2, 3]), frozenset([4])]
   56.39 -    >>> g.cliques()
   56.40 -    Bron and Kerbosh algorithm started
   56.41 -    [frozenset([1, 2, 3])]
   56.42 -    """
   56.43 -    
   56.44 -    def __init__(self, nodes=None, lines=None):
   56.45 -        if not nodes:
   56.46 -            nodes = set()
   56.47 -        if not lines:
   56.48 -            lines = dict()
   56.49 -        self.nodes = set(nodes) # copy
   56.50 -        self.lines = {}
   56.51 -        for line, cost in lines.items():
   56.52 -            if len(line) == 2 and line.issubset(self.nodes):
   56.53 -                self.lines[line] = cost
   56.54 -        
   56.55 -    @staticmethod
   56.56 -    def line(k1, k2):
   56.57 -        """ Construct object, representing line of graph """
   56.58 -        return frozenset([k1, k2])
   56.59 -    
   56.60 -    def bounded(self, k1, k2):
   56.61 -        """ Return if these two nodes of the graph are bounded with line """
   56.62 -        return k1 == k2 or Graph.line(k1, k2) in self.lines
   56.63 -    
   56.64 -    def count_one(self, node):
   56.65 -        """ Returns number of connections of this node """
   56.66 -        return len([node1 for node1 in self.nodes if self.bounded(node, node1)]) - 1
   56.67 -    
   56.68 -    def cost_one(self, node):
   56.69 -        """ Returns sum of costs of all connections of this node """
   56.70 -        return sum([self.lines.get(Graph.line(node, node1), 0) 
   56.71 -        for node1 in self.nodes if node != node1])
   56.72 -    
   56.73 -    def count_all(self):
   56.74 -        """ Returns {node: number of connections of this node} """
   56.75 -        c = dict([(node, 0) for node in self.nodes])
   56.76 -        for line in self.lines:
   56.77 -            for node in line:
   56.78 -                c[node] += 1
   56.79 -        return c
   56.80 -    
   56.81 -    
   56.82 -    def drop_node(self, node):
   56.83 -        """ Remove node and all involved lines """
   56.84 -        for node1 in self.nodes:
   56.85 -            self.lines.pop(Graph.line(node, node1), None)
   56.86 -        self.nodes.discard(node)
   56.87 -    
   56.88 -    def add_node(self, node, parent_graph):
   56.89 -        """ Add node and corresponding lines from parent_graph
   56.90 -        
   56.91 -        Added lines should be contained in self graph
   56.92 -        (takes care of hanging lines)
   56.93 -        """
   56.94 -        self.nodes.add(node)
   56.95 -        for node1 in self.nodes:
   56.96 -            line = Graph.line(node, node1)
   56.97 -            if line in parent_graph.lines:
   56.98 -                self.lines[line] = parent_graph.lines[line]
   56.99 -    
  56.100 -    def drop_nodes(self, nodes):
  56.101 -        """ Run drop_node for each of given nodes
  56.102 -        
  56.103 -        Returns if nodes was not empty (ugly beauty)
  56.104 -        """
  56.105 -        for node in nodes:
  56.106 -            self.drop_node(node)
  56.107 -        return bool(nodes)
  56.108 -    
  56.109 -    def drop_if_count(self, minsize):
  56.110 -        """ Run drop_node for each node, that has less than minsize lines """
  56.111 -        while True:
  56.112 -            if not self.drop_nodes([node for (node, count) 
  56.113 -            in self.count_all().items() if count < minsize]):
  56.114 -                break
  56.115 -     
  56.116 -    def bron_kerbosh(self, timeout=-1, minsize=1):
  56.117 -        """ Bron and Kerboch algorithm implementation
  56.118 -        
  56.119 -        returns list of cliques
  56.120 -        clique is frozenset
  56.121 -        if timeout=-1, it means infinity
  56.122 -        if timeout has happened, raises TimeoutError
  56.123 -        
  56.124 -        lava flow
  56.125 -        """
  56.126 -        print 'Bron and Kerbosh algorithm started'
  56.127 -        cliques = []
  56.128 -        
  56.129 -        depth = 0
  56.130 -        list_candidates = [copy(self.nodes)]
  56.131 -        list_used = [set()]
  56.132 -        compsub = []
  56.133 -        
  56.134 -        start_time = datetime.now()
  56.135 -        timeout_timedelta = timedelta(timeout)
  56.136 -        
  56.137 -        while True: # ПОКА...
  56.138 -            if depth == -1:
  56.139 -                break  # ВСЕ! Все рекурсии (итерации) пройдены
  56.140 -            candidates = copy(list_candidates[depth])
  56.141 -            used = copy(list_used[depth])       
  56.142 -            if not candidates: # ПОКА candidates НЕ пусто
  56.143 -                depth -= 1
  56.144 -                if compsub:
  56.145 -                    compsub.pop()                
  56.146 -                continue
  56.147 -            
  56.148 -            # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates
  56.149 -            # (все из used НЕ соединены хотя бы с 1 из candidates)
  56.150 -            used_candidates = False
  56.151 -        
  56.152 -            for used1 in used:
  56.153 -                for candidates1 in candidates:
  56.154 -                    if not self.bounded(used1, candidates1):
  56.155 -                        break
  56.156 -                else:
  56.157 -                    used_candidates = True             
  56.158 -
  56.159 -            if used_candidates:
  56.160 -                depth -= 1
  56.161 -
  56.162 -                if compsub:
  56.163 -                    compsub.pop()
  56.164 -                continue
  56.165 -            
  56.166 -            # Выбираем вершину v из candidates и добавляем ее в compsub
  56.167 -            v = candidates.pop()
  56.168 -            candidates.add(v)
  56.169 -            compsub.append(v)
  56.170 -            # Формируем new_candidates и new_used, удаляя из candidates и used вершины, НЕ соединенные с v
  56.171 -            # (то есть, оставляя только соединенные с v)
  56.172 -            new_candidates = set()
  56.173 -            for candidates1 in candidates:
  56.174 -                if self.bounded(candidates1, v) and candidates1 != v:
  56.175 -                    new_candidates.add(candidates1)
  56.176 -
  56.177 -            new_used = set()
  56.178 -            for used1 in used:
  56.179 -                if self.bounded(used1, v) and used1 != v:
  56.180 -                    new_used.add(used1)
  56.181 -            
  56.182 -            # Удаляем v из candidates и помещаем в used
  56.183 -            list_candidates[depth].remove(v)    
  56.184 -            list_used[depth].add(v)
  56.185 -            # ЕСЛИ new_candidates и new_used пусты
  56.186 -            if not new_candidates and not new_used:
  56.187 -                # compsub ? клика
  56.188 -                if len(compsub) >= minsize:
  56.189 -                    cliques.append(frozenset(compsub))
  56.190 -            else:
  56.191 -                # ИНАЧЕ рекурсивно вызываем bron_kerbosh(new_candidates, new_used)
  56.192 -                depth += 1
  56.193 -
  56.194 -                # TIMEOUT check start
  56.195 -                if timeout != -1:
  56.196 -                    if datetime.now() - start_time > timeout_timedelta:
  56.197 -                        raise TimeoutError
  56.198 -                # TIMEOUT check end
  56.199 -                
  56.200 -                if depth >= len(list_candidates):
  56.201 -                    list_candidates.append(set())
  56.202 -                    list_used.append(set())
  56.203 -
  56.204 -                list_candidates[depth] = copy(new_candidates)
  56.205 -                list_used[depth] = copy(new_used)
  56.206 -                
  56.207 -                continue
  56.208 -            
  56.209 -            # Удаляем v из compsub
  56.210 -            if compsub:
  56.211 -                compsub.pop()
  56.212 -                
  56.213 -        return cliques
  56.214 -
  56.215 -    
  56.216 -    def fast_cliques(self, minsize=1):
  56.217 -        """ returns list of cliques
  56.218 -        
  56.219 -        clique is frozenset
  56.220 -        """
  56.221 -        print 'Fast algorithm started'
  56.222 -        cliques = []
  56.223 -        
  56.224 -        while True:
  56.225 -            graph = Graph(self.nodes, self.lines)
  56.226 -            for clique in cliques:
  56.227 -                graph.drop_nodes(clique)
  56.228 -            if not graph.nodes:
  56.229 -                break
  56.230 -            
  56.231 -            while True: 
  56.232 -                # drop nodes, while its is possible
  56.233 -                if len(graph.nodes) == 1:
  56.234 -                    break
  56.235 -                c = graph.count_all()
  56.236 -                min_count = min(c.values())
  56.237 -                bad_nodes = [node for (node, count) in c.items() if count == min_count]
  56.238 -                if len(bad_nodes) == len(graph.nodes) and min_count != 0:
  56.239 -                    break
  56.240 -                
  56.241 -                costs = dict([(node, graph.cost_one(node)) for node in bad_nodes])
  56.242 -                min_cost = min(costs.values())
  56.243 -                for node, cost in costs.items():
  56.244 -                    if cost == min_cost:
  56.245 -                        graph.drop_node(node)
  56.246 -                        break
  56.247 -            
  56.248 -            while True:
  56.249 -                # add nodes, while its is possible
  56.250 -                candidats = {}
  56.251 -                for node in self.nodes:
  56.252 -                    c = len([i for i in graph.nodes if self.bounded(node, i)])
  56.253 -                    if c == len(self.nodes):
  56.254 -                        graph1 = Graph(graph.nodes, graph.lines)
  56.255 -                        graph1.add_node(node, self)
  56.256 -                        candidats[node] = graph1.cost_one(node)
  56.257 -                if not candidats:
  56.258 -                    break
  56.259 -                
  56.260 -                max_cost = max(candidats.values())
  56.261 -                node = [node for (node, cost) in candidats.items() if cost == max_cost][0]
  56.262 -                graph.add_node(node, self)
  56.263 -                
  56.264 -            cliques.append(frozenset(graph.nodes))
  56.265 -            
  56.266 -        return cliques
  56.267 -        
  56.268 -    
  56.269 -    def cliques(self, timeout=-1, minsize=1):
  56.270 -        """ returns length-sorted list of cliques
  56.271 -        
  56.272 -        clique is frozenset
  56.273 -        
  56.274 -        can change self!
  56.275 -        
  56.276 -        try to execute bron_kerbosh
  56.277 -        if it raises TimeoutError, executes fast_cliques
  56.278 -        """
  56.279 -        
  56.280 -        self.drop_if_count(minsize)
  56.281 -        
  56.282 -        try:
  56.283 -            cliques = self.bron_kerbosh(timeout, minsize)
  56.284 -            cliques.sort(key=lambda clique: len(clique), reverse=True)
  56.285 -        except TimeoutError:
  56.286 -            cliques = self.fast_cliques(minsize)
  56.287 -        return cliques
  56.288 -
  56.289 -if __name__ == "__main__":
  56.290 -    import doctest
  56.291 -    doctest.testmod()
    57.1 --- a/lib/monomer.py	Mon Jan 24 21:40:10 2011 +0300
    57.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    57.3 @@ -1,88 +0,0 @@
    57.4 -#!/usr/bin/python
    57.5 -
    57.6 -from allpy_data.AAdict import AAdict
    57.7 -import Bio.PDB
    57.8 -
    57.9 -index_code3 = {}
   57.10 -index_code1_protein = {}
   57.11 -index_name = {}
   57.12 -
   57.13 -
   57.14 -class MonomerType(object):
   57.15 -    """ Monomer type
   57.16 -    
   57.17 -    name -- string like "Valine" 
   57.18 -    code1 -- one-letter code (in upper case)
   57.19 -    code3 -- three-letter code (in upper case)
   57.20 -    is_modified -- True of False
   57.21 -    """
   57.22 -    def __init__(self, name, code1, code3, is_modified=False):
   57.23 -        self.name = name.capitalize()
   57.24 -        self.code1 = code1.upper()
   57.25 -        self.code3 = code3.upper()
   57.26 -        self.is_modified = bool(is_modified) # ugly
   57.27 -        
   57.28 -        index_name[self.name] = self
   57.29 -        index_code3[self.code3] = self
   57.30 -    
   57.31 -    @staticmethod
   57.32 -    def from_code3(code3):
   57.33 -        return index_code3[code3.upper()]
   57.34 -    @staticmethod
   57.35 -    def from_name(name):
   57.36 -        return index_name[name.capitalize()]
   57.37 -      
   57.38 -    @staticmethod
   57.39 -    def from_pdb_residue(pdb_residue):
   57.40 -        return MonomerType.from_code3(pdb_residue.get_resname())
   57.41 -        
   57.42 -    # TO DISCUSS
   57.43 -    def __eq__(self, other):
   57.44 -        return self.code1 == other.code1
   57.45 -
   57.46 -    def __ne__(self, other):
   57.47 -        return not (self == other)
   57.48 -
   57.49 -
   57.50 -class Monomer(object):
   57.51 -    """ Monomer 
   57.52 -    
   57.53 -    type -- link to MonomerType object
   57.54 -    """
   57.55 -    def __init__(self, monomer_type):
   57.56 -        self.type = monomer_type
   57.57 -        
   57.58 -    def __eq__(self, other):
   57.59 -        return self.type == other.type
   57.60 -
   57.61 -    def __ne__(self, other):
   57.62 -        return not (self == other)
   57.63 -
   57.64 -class AminoAcidType(MonomerType):
   57.65 -    def __init__(self, name, code1, code3, is_modified=False):
   57.66 -        MonomerType.__init__(self, name, code1, code3, is_modified)
   57.67 -        if not is_modified:
   57.68 -            index_code1_protein[self.code1] = self
   57.69 -
   57.70 -    @staticmethod
   57.71 -    def from_code1(code1):
   57.72 -        return index_code1_protein[code1.upper()]
   57.73 -    def instance(self):
   57.74 -        """ Returns new AminoAcid object of this type """
   57.75 -        return AminoAcid(self)
   57.76 -
   57.77 -
   57.78 -class AminoAcid(Monomer):
   57.79 -    """  Amino acid """
   57.80 -    pass
   57.81 -    
   57.82 -
   57.83 -# prepare all aminoacids
   57.84 -
   57.85 -for code3, data in AAdict.items():
   57.86 -    code1, m_type, is_modified, none, name = data
   57.87 -    if m_type == 'p':
   57.88 -        AminoAcidType(name, code1, code3, is_modified)
   57.89 -
   57.90 -del code3, data, code1, m_type, is_modified, none, name
   57.91 -
    58.1 --- a/lib/project.py	Mon Jan 24 21:40:10 2011 +0300
    58.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    58.3 @@ -1,255 +0,0 @@
    58.4 -#!/usr/bin/python
    58.5 -
    58.6 -"""
    58.7 -    "I will not use abbrev."
    58.8 -    "I will always finish what I st"    
    58.9 -                                            - Bart Simpson
   58.10 -
   58.11 -"""
   58.12 -
   58.13 -import sequence
   58.14 -Sequence = sequence.Sequence
   58.15 -from monomer import AminoAcidType
   58.16 -import allpy_data
   58.17 -from tempfile import NamedTemporaryFile
   58.18 -import os
   58.19 -import block
   58.20 -from fasta import save_fasta
   58.21 -
   58.22 -Block = block.Block
   58.23 -
   58.24 -class Project(object):
   58.25 -    """ Alignment representing class
   58.26 -    
   58.27 -    Mandatory data:
   58.28 -    *   sequences -- list of Sequence objects. Sequences don't contain gaps
   58.29 -         - see sequence.py module
   58.30 -    *   alignment -- dict 
   58.31 -        {<Sequence object>:[<Monomer object>,None,<Monomer object>]}
   58.32 -        keys are the Sequence objects, values are the lists, which
   58.33 -        contain monomers of those sequences or None for gaps in the
   58.34 -        corresponding sequence of
   58.35 -        alignment
   58.36 -
   58.37 -    """
   58.38 -    def __init__(self, *args):
   58.39 -        """overloaded constructor
   58.40 -
   58.41 -        Project() -> new empty Project
   58.42 -        Project(sequences, alignment) -> new Project with sequences and
   58.43 -            alignment initialized from arguments
   58.44 -        Project(fasta_file) -> new Project, read alignment and sequences
   58.45 -             from fasta file 
   58.46 -
   58.47 -        """
   58.48 -        if len(args)>1:#overloaded constructor
   58.49 -            self.sequences=args[0]
   58.50 -            self.alignment=args[1]
   58.51 -        elif len(args)==0:
   58.52 -            self.sequences=[]
   58.53 -            self.alignment={}
   58.54 -        else:
   58.55 -            self.sequences,self.alignment=Project.from_fasta(args[0])
   58.56 -
   58.57 -    def __len__(self):
   58.58 -        """ Returns width, ie length of each sequence with gaps """
   58.59 -        return max([len(line) for line in self.alignment.values()])
   58.60 -
   58.61 -    def thickness(self):
   58.62 -        """ The number of sequences in alignment (it's thickness). """
   58.63 -        return len(self.alignment)
   58.64 -       
   58.65 -    def calc_identity(self):
   58.66 -        """ Calculate the identity of alignment positions for colouring.
   58.67 -
   58.68 -        For every (row, column) in alignment the percentage of the exactly
   58.69 -        same residue in the same column in the alignment is calculated.
   58.70 -        The data structure is just like the Project.alignment, but istead of 
   58.71 -        monomers it contains float percentages.
   58.72 -        """
   58.73 -        # Oh, God, that's awful! Absolutely not understandable.
   58.74 -        # First, calculate percentages of amino acids in every column
   58.75 -        contribution = 1.0 / len(self.sequences)
   58.76 -        all_columns = []
   58.77 -        for position in range(len(self)):
   58.78 -            column_percentage = {}
   58.79 -            for seq in self.alignment:
   58.80 -                if self.alignment[seq][position] is not None:
   58.81 -                    aa = self.alignment[seq][position].code
   58.82 -                else:
   58.83 -                    aa = None
   58.84 -                if aa in allpy_data.amino_acids:
   58.85 -                    if aa in column_percentage.keys():
   58.86 -                        column_percentage[aa] += contribution
   58.87 -                    else:
   58.88 -                        column_percentage[aa] = contribution
   58.89 -            all_columns.append(column_percentage)
   58.90 -        # Second, map these percentages onto the alignment
   58.91 -        self.identity_percentages = {}
   58.92 -        for seq in self.sequences:
   58.93 -            self.identity_percentages[seq] = []
   58.94 -        for seq in self.identity_percentages:
   58.95 -            line = self.identity_percentages[seq]
   58.96 -            for position in range(len(self)):
   58.97 -                if self.alignment[seq][position] is not None:
   58.98 -                    aa = self.alignment[seq][position].code
   58.99 -                else:
  58.100 -                    aa = None
  58.101 -                line.append(all_columns[position].get(aa))
  58.102 -        return self.identity_percentages
  58.103 -
  58.104 -    @staticmethod
  58.105 -    def from_fasta(file, monomer_kind=AminoAcidType):
  58.106 -        """ Import data from fasta file
  58.107 -        
  58.108 -        monomer_kind is class, inherited from MonomerType
  58.109 -        
  58.110 -        >>> import project
  58.111 -        >>> sequences,alignment=project.Project.from_fasta(open("test.fasta"))       
  58.112 -        """
  58.113 -        import re
  58.114 -
  58.115 -        sequences = []
  58.116 -        alignment = {}
  58.117 -
  58.118 -        raw_sequences = file.read().split(">")
  58.119 -        if len(raw_sequences) <= 1:
  58.120 -            raise Exception("Wrong format of fasta-file %s" % file.name)
  58.121 -        
  58.122 -        raw_sequences = raw_sequences[1:] #ignore everything before the first >
  58.123 -        for raw in raw_sequences:
  58.124 -            parsed_raw_sequence = raw.split("\n")
  58.125 -            parsed_raw_sequence = [s.strip() for s in parsed_raw_sequence]
  58.126 -            name_and_description = parsed_raw_sequence[0]
  58.127 -            name_and_description = name_and_description.split(" ",1)
  58.128 -            if len(name_and_description) == 2:
  58.129 -                name, description = name_and_description
  58.130 -            elif len(name_and_description) == 1: 
  58.131 -                #if there is description
  58.132 -                name = name_and_description[0]
  58.133 -                description = ''
  58.134 -            else:
  58.135 -                raise Exception("Wrong name of sequence %(name)$ fasta-file %(file)s" % \
  58.136 -                {'name': name, 'file': file.name})
  58.137 -            
  58.138 -            if len(parsed_raw_sequence) <= 1:
  58.139 -                raise Exception("Wrong format of sequence %(name)$ fasta-file %(file)s" % \
  58.140 -                {'name': name, 'file': file.name})
  58.141 -            string = ""
  58.142 -            for piece in parsed_raw_sequence[1:]:
  58.143 -                piece_without_whitespace_chars = re.sub("\s", "", piece)
  58.144 -                string += piece_without_whitespace_chars
  58.145 -            monomers = [] #convert into Monomer objects
  58.146 -            alignment_list = [] #create the respective list in alignment dict
  58.147 -            for current_monomer in string:
  58.148 -                if current_monomer not in ["-", ".", "~"]:
  58.149 -                    monomers.append(monomer_kind.from_code1(current_monomer).instance())
  58.150 -                    alignment_list.append(monomers[-1])
  58.151 -                else:
  58.152 -                    alignment_list.append(None)
  58.153 -            sequence = Sequence(monomers, name, description)
  58.154 -            sequences.append(sequence)
  58.155 -            alignment[sequence] = alignment_list
  58.156 -        return sequences, alignment
  58.157 -    
  58.158 -    
  58.159 -    @staticmethod
  58.160 -    def from_sequences(*sequences):
  58.161 -        """  Constructs new alignment from sequences
  58.162 -        
  58.163 -        Add None's to right end to make equal lengthes of alignment sequences 
  58.164 -        """
  58.165 -        project = Project()
  58.166 -        project.sequences = sequences
  58.167 -        max_length = max(len(sequence) for sequence in sequences)
  58.168 -        for sequence in sequences:
  58.169 -            gaps_count = max_length - len(sequence)
  58.170 -            project.alignment[sequence] = sequence.monomers + [None] * gaps_count
  58.171 -        return project
  58.172 -    
  58.173 -    def save_fasta(self, out_file, long_line=70, gap='-'):
  58.174 -        """ Saves alignment to given file
  58.175 -        
  58.176 -        Splits long lines to substrings of length=long_line
  58.177 -        To prevent this, set long_line=None 
  58.178 -        """
  58.179 -        Block(self).save_fasta(out_file, long_line=long_line, gap=gap)
  58.180 -        
  58.181 -    def muscle_align(self):
  58.182 -        """ Simple align ths alignment using sequences (muscle)
  58.183 -        
  58.184 -        uses old Monomers and Sequences objects
  58.185 -        """
  58.186 -        tmp_file = NamedTemporaryFile(delete=False)
  58.187 -        self.save_fasta(tmp_file)
  58.188 -        tmp_file.close()
  58.189 -        os.system("muscle -in %(tmp)s -out %(tmp)s" % {'tmp': tmp_file.name})
  58.190 -        sequences, alignment = Project.from_fasta(open(tmp_file.name))
  58.191 -        for sequence in self.sequences:
  58.192 -            try:
  58.193 -                new_sequence = [i for i in sequences if sequence==i][0]
  58.194 -            except:
  58.195 -                raise Exception("Align: Cann't find sequence %s in muscle output" % \
  58.196 -                sequence.name)
  58.197 -            old_monomers = iter(sequence.monomers)
  58.198 -            self.alignment[sequence] = []
  58.199 -            for monomer in alignment[new_sequence]:
  58.200 -                if not monomer:
  58.201 -                    self.alignment[sequence].append(monomer)
  58.202 -                else:
  58.203 -                    old_monomer = old_monomers.next()
  58.204 -                    if monomer != old_monomer:
  58.205 -                        raise Exception("Align: alignment errors")
  58.206 -                    self.alignment[sequence].append(old_monomer)
  58.207 -        os.unlink(tmp_file.name)
  58.208 -        
  58.209 -        
  58.210 -    def column(self, sequence=None, sequences=None, original=None):
  58.211 -        """ returns list of columns of alignment
  58.212 -        
  58.213 -        sequence or sequences:
  58.214 -            if sequence is given, then column is (original_monomer, monomer)
  58.215 -            if sequences is given, then column is (original_monomer, {sequence: monomer}) 
  58.216 -            if both of them are given, it is an error
  58.217 -        original (Sequence type):
  58.218 -            if given, this filters only columns represented by original sequence
  58.219 -        """
  58.220 -        if sequence and sequences:
  58.221 -            raise Exception("Wrong usage. read help")
  58.222 -        indexes = dict([(v, k) for( k, v) in enumerate(self.sequences)])
  58.223 -        alignment = self.alignment.items()
  58.224 -        alignment.sort(key=lambda i: indexes[i[0]])
  58.225 -        alignment = [monomers for seq, monomers in alignment]
  58.226 -        for column in zip(*alignment):
  58.227 -            if not original or column[indexes[original]]:
  58.228 -                if sequence:
  58.229 -                    yield (column[indexes[original]], column[indexes[sequence]])
  58.230 -                else:
  58.231 -                    yield (column[indexes[original]], 
  58.232 -                    dict([(s, column[indexes[s]]) for s in sequences]))
  58.233 -        
  58.234 -    def pdb_auto_add(self, conformity_file=None):
  58.235 -        """ Adds pdb information to each sequence
  58.236 -        
  58.237 -        TODO: conformity_file
  58.238 -        """
  58.239 -        conformity = {}
  58.240 -        
  58.241 -        for sequence in self.sequences:
  58.242 -            try:
  58.243 -                sequence.pdb_auto_add(conformity.get(sequence.name, None))
  58.244 -            except Exception, t:
  58.245 -                print "Cann't add pdb information about chain %s:" % sequence.name
  58.246 -                print t
  58.247 -        
  58.248 -    def secstr(self, secuence, pdb_chain, gap='-'):
  58.249 -        """ Returns string representing secondary structure """
  58.250 -        return ''.join([
  58.251 -        (secuence.pdb_secstr[pdb_chain][m] if secuence.secstr_has(pdb_chain, m) else gap) 
  58.252 -        for m in self.alignment[secuence]])
  58.253 -    
  58.254 -    def save_secstr(self, out_file, secuence, pdb_chain, 
  58.255 -    name, description='', gap='-', long_line=70):
  58.256 -        """ Save secondary structure and name in fasta format """
  58.257 -        save_fasta(out_file, self.secstr(secuence, pdb_chain, gap), name, description, long_line)
  58.258 -    
    59.1 --- a/lib/sequence.py	Mon Jan 24 21:40:10 2011 +0300
    59.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    59.3 @@ -1,199 +0,0 @@
    59.4 -#!/usr/bin/python
    59.5 -# -*- coding: utf-8 -*- 
    59.6 -
    59.7 -from monomer import AminoAcidType
    59.8 -from Bio.PDB import CaPPBuilder, PDBIO
    59.9 -from Bio.PDB.DSSP import make_dssp_dict
   59.10 -from allpy_pdb import std_id, pdb_id_parse, get_structure
   59.11 -import project
   59.12 -import sys
   59.13 -import config
   59.14 -import os.path
   59.15 -import urllib2
   59.16 -from tempfile import NamedTemporaryFile
   59.17 -import os
   59.18 -
   59.19 -
   59.20 -class Sequence(object):
   59.21 -    """ Sequence of Monomers
   59.22 -    
   59.23 -    Mandatory data:
   59.24 -    *   name -- str with the name of sequence
   59.25 -    *   description -- str with description of the sequence
   59.26 -    *   monomers -- list of monomer objects (aminoacids or nucleotides)
   59.27 -    *   pdb_chains -- list of Bio.PDB.Chain's
   59.28 -    *   pdb_files -- dictionary like {Bio.PDB.Chain: file_obj}
   59.29 -    
   59.30 -    *   pdb_residues -- dictionary like {Bio.PDB.Chain: {Monomer: Bio.PDB.Residue}}
   59.31 -    *   pdb_secstr -- dictionary like {Bio.PDB.Chain: {Monomer: 'Secondary structure'}}
   59.32 -            Code   Secondary structure
   59.33 -            H      ?-helix
   59.34 -            B      Isolated ?-bridge residue
   59.35 -            E      Strand
   59.36 -            G      3-10 helix
   59.37 -            I      ?-helix
   59.38 -            T      Turn
   59.39 -            S      Bend
   59.40 -            -      Other
   59.41 -    
   59.42 -    
   59.43 -    ?TODO: global pdb_structures 
   59.44 -    """
   59.45 -    def __init__(self, monomers=None, name='', description=""):
   59.46 -        if not monomers:
   59.47 -            monomers = []
   59.48 -        self.name = name
   59.49 -        self.description = description
   59.50 -        self.monomers = monomers 
   59.51 -        self.pdb_chains = []
   59.52 -        self.pdb_files = {}
   59.53 -        self.pdb_residues = {}
   59.54 -        self.pdb_secstr = {}
   59.55 -        
   59.56 -    def __len__(self):
   59.57 -        return len(self.monomers)
   59.58 -    
   59.59 -    def __str__(self):
   59.60 -        """ Returns sequence in one-letter code """
   59.61 -        return ''.join([monomer.type.code1 for monomer in self.monomers])
   59.62 -        
   59.63 -    def __eq__(self, other):
   59.64 -        """ Returns if all corresponding monomers of this sequences are equal
   59.65 -        
   59.66 -        If lengths of sequences are not equal, returns False
   59.67 -        """ 
   59.68 -        return len(self) == len(other) and \
   59.69 -        all([a==b for a, b in zip(self.monomers, other.monomers)])
   59.70 -    
   59.71 -    def __ne__(self, other):
   59.72 -        return not (self == other)
   59.73 -    
   59.74 -    def pdb_chain_add(self, pdb_file, pdb_id, pdb_chain, pdb_model=0):
   59.75 -        """ Reads Pdb chain from file 
   59.76 -        
   59.77 -        and align each Monomer with PDB.Residue (TODO)
   59.78 -        """
   59.79 -        name = std_id(pdb_id, pdb_chain, pdb_model)
   59.80 -        structure = get_structure(pdb_file, name)
   59.81 -        try:
   59.82 -            chain = structure[pdb_model][pdb_chain]
   59.83 -        except:
   59.84 -            chain = list(list(structure)[0])[0]
   59.85 -        self.pdb_chains.append(chain)
   59.86 -        self.pdb_residues[chain] = {}
   59.87 -        self.pdb_secstr[chain] = {}
   59.88 -        pdb_sequence = Sequence.from_pdb_chain(chain)
   59.89 -        alignment = project.Project.from_sequences(self, pdb_sequence)
   59.90 -        alignment.muscle_align()
   59.91 -        for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self):
   59.92 -            if pdb_sequence.pdb_has(chain, pdb_monomer):
   59.93 -                residue = pdb_sequence.pdb_residues[chain][pdb_monomer]
   59.94 -                self.pdb_residues[chain][monomer] = residue
   59.95 -        self.pdb_files[chain] = pdb_file
   59.96 -    
   59.97 -    def pdb_unload(self):
   59.98 -        """ Delete all pdb-connected links """
   59.99 -        #~ gc.get_referrers(self.pdb_chains[0])
  59.100 -        self.pdb_chains = []
  59.101 -        self.pdb_residues = {}
  59.102 -        self.pdb_secstr = {} # FIXME
  59.103 -        self.pdb_files = {} # FIXME
  59.104 -    
  59.105 -    @staticmethod
  59.106 -    def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType):
  59.107 -        """ Import data from one-letter code
  59.108 -        
  59.109 -        monomer_kind is class, inherited from MonomerType
  59.110 -        """
  59.111 -        monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str]
  59.112 -        return Sequence(monomers, name, description)
  59.113 -
  59.114 -    @staticmethod
  59.115 -    def from_pdb_chain(chain):
  59.116 -        """ Returns Sequence with Monomers with link to Bio.PDB.Residue
  59.117 -        
  59.118 -        chain is Bio.PDB.Chain
  59.119 -        """
  59.120 -        cappbuilder = CaPPBuilder()
  59.121 -        peptides = cappbuilder.build_peptides(chain)
  59.122 -        sequence = Sequence()
  59.123 -        sequence.pdb_chains = [chain]
  59.124 -        sequence.pdb_residues[chain] = {}
  59.125 -        sequence.pdb_secstr[chain] = {}
  59.126 -        for peptide in peptides:
  59.127 -            for ca_atom in peptide.get_ca_list():
  59.128 -                residue = ca_atom.get_parent()
  59.129 -                try:
  59.130 -                    monomer = AminoAcidType.from_pdb_residue(residue).instance()
  59.131 -                    sequence.pdb_residues[chain][monomer] = residue
  59.132 -                    sequence.monomers.append(monomer)
  59.133 -                except:
  59.134 -                    print "Warning: unknown monomer in PDB: %s" % residue
  59.135 -        return sequence
  59.136 -    
  59.137 -    def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'):
  59.138 -        """ Adds pdb information to each monomer
  59.139 -        
  59.140 -        Returns if information has been successfully added
  59.141 -        TODO: conformity_file
  59.142 -        
  59.143 -        id-format lava flow
  59.144 -        """
  59.145 -        if not conformity_info:
  59.146 -            path = os.path.join(pdb_directory, self.name)
  59.147 -            if os.path.exists(path) and os.path.getsize(path):
  59.148 -                match = pdb_id_parse(self.name)
  59.149 -                self.pdb_chain_add(open(path), match['code'], 
  59.150 -                match['chain'], match['model'])
  59.151 -            else:
  59.152 -                match = pdb_id_parse(self.name)
  59.153 -                if match:
  59.154 -                    code = match['code']
  59.155 -                    pdb_filename = config.pdb_dir % code
  59.156 -                    if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename):
  59.157 -                        url = config.pdb_url % code
  59.158 -                        print "Download %s" % url
  59.159 -                        pdb_file = open(pdb_filename, 'w')
  59.160 -                        data = urllib2.urlopen(url).read()
  59.161 -                        pdb_file.write(data)
  59.162 -                        pdb_file.close()
  59.163 -                        print "Save %s" % pdb_filename
  59.164 -                    pdb_file = open(pdb_filename)
  59.165 -                    self.pdb_chain_add(pdb_file, code, match['chain'], match['model'])
  59.166 -        
  59.167 -    def pdb_save(self, out_filename, pdb_chain):
  59.168 -        """ Saves pdb_chain to out_file """
  59.169 -        class GlySelect(Select):
  59.170 -            def accept_chain(self, chain):
  59.171 -                if chain == pdb_chain:
  59.172 -                    return 1
  59.173 -                else:
  59.174 -                    return 0
  59.175 -        io = PDBIO()
  59.176 -        structure = chain.get_parent()
  59.177 -        io.set_structure(structure)
  59.178 -        io.save(out_filename, GlySelect())
  59.179 -        
  59.180 -    
  59.181 -    def pdb_add_sec_str(self, pdb_chain):
  59.182 -        """ Add secondary structure data """
  59.183 -        tmp_file = NamedTemporaryFile(delete=False)
  59.184 -        tmp_file.close()
  59.185 -        pdb_file = self.pdb_files[pdb_chain].name
  59.186 -        os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name})
  59.187 -        dssp, keys = make_dssp_dict(tmp_file.name)
  59.188 -        for monomer in self.monomers:
  59.189 -            if self.pdb_has(pdb_chain, monomer):
  59.190 -                residue = self.pdb_residues[pdb_chain][monomer]
  59.191 -                try:
  59.192 -                    d = dssp[(pdb_chain.get_id(), residue.get_id())]
  59.193 -                    self.pdb_secstr[pdb_chain][monomer] = d[1]
  59.194 -                except:
  59.195 -                    print "No dssp information about %s at %s" % (monomer, pdb_chain)
  59.196 -        os.unlink(tmp_file.name)
  59.197 -    
  59.198 -    def pdb_has(self, chain, monomer):
  59.199 -        return chain in self.pdb_residues and monomer in self.pdb_residues[chain]
  59.200 -    
  59.201 -    def secstr_has(self, chain, monomer):
  59.202 -        return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
    60.1 --- a/pytale/dummy_pytale.py	Mon Jan 24 21:40:10 2011 +0300
    60.2 +++ b/pytale/dummy_pytale.py	Tue Jan 25 16:03:00 2011 +0300
    60.3 @@ -11,7 +11,7 @@
    60.4      1) MenuBar
    60.5      2) Resizable panel with ListBox containing sequence names
    60.6      3) Panel with RichTextBox containing the alignment itself
    60.7 -    4) StatusBar 
    60.8 +    4) StatusBar
    60.9      """
   60.10      def __init__(self, parent, title, size=(1000, 600)):
   60.11          global settings
   60.12 @@ -140,7 +140,7 @@
   60.13                  end = self.text.XYToPosition(column+1, line)
   60.14                  self.text.SetStyle(start, end, self.settings.conservation_styles[conser])
   60.15          print 'colouring done'
   60.16 -                
   60.17 +
   60.18  
   60.19  class Settings(object):
   60.20      def __init__(self):
    61.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    61.2 +++ b/repeats/config.py	Tue Jan 25 16:03:00 2011 +0300
    61.3 @@ -0,0 +1,1 @@
    61.4 +min_intersection = 15
    62.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    62.2 +++ b/repeats/repeat_joiner.py	Tue Jan 25 16:03:00 2011 +0300
    62.3 @@ -0,0 +1,262 @@
    62.4 +""" Lib to join pair repeats
    62.5 +
    62.6 +Throuot: [from, to)
    62.7 +from < to
    62.8 +ori: True for '+'-strand, - for '-'-strand
    62.9 +"""
   62.10 +
   62.11 +import Queue
   62.12 +
   62.13 +from bx.intervals.intersection import Intersecter
   62.14 +
   62.15 +import config
   62.16 +
   62.17 +class Chromosome(Intersecter):
   62.18 +    """ non-cycled, linear chromosome
   62.19 +
   62.20 +    Data:
   62.21 +        name
   62.22 +    """
   62.23 +    def __init__(self, name):
   62.24 +        self.name = name
   62.25 +
   62.26 +    def __repr__(self):
   62.27 +        return self.name
   62.28 +
   62.29 +    __str__ = __repr__
   62.30 +
   62.31 +class Interval(object):
   62.32 +    """ individual interval
   62.33 +
   62.34 +    start
   62.35 +    end
   62.36 +
   62.37 +    chromosome
   62.38 +    ori
   62.39 +    sib === thick_edge -- paired interval
   62.40 +
   62.41 +    in interval_group (installes in init):
   62.42 +        group_real_start -- position of real_start() of this interval in group
   62.43 +        group_ori -- if (position of real_start() of this interval in the group)
   62.44 +                        < (position of real_end())
   62.45 +    """
   62.46 +
   62.47 +    def __init__(self, repeat_joiner, chromosome_name, pos_from, pos_to, ori=True):
   62.48 +        repeat_joiner._intervals.append(self)
   62.49 +        self.start = pos_from
   62.50 +        self.end = pos_to
   62.51 +        self.ori = ori
   62.52 +        self.group_real_start = None # for repr
   62.53 +        self.group_ori = None
   62.54 +        if chromosome_name not in repeat_joiner:
   62.55 +            repeat_joiner[chromosome_name] = Chromosome(chromosome_name)
   62.56 +        self.chromosome = repeat_joiner[chromosome_name]
   62.57 +        repeat_joiner[chromosome_name].insert_interval(self)
   62.58 +
   62.59 +    @staticmethod
   62.60 +    def pair(interval_1, interval_2):
   62.61 +        """ connect these interval with thick edge """
   62.62 +        interval_1.sib = interval_2
   62.63 +        interval_2.sib = interval_1
   62.64 +
   62.65 +    def __eq__(self, other):
   62.66 +        return self.chromosome == other.chromosome and \
   62.67 +                self.start == other.start and \
   62.68 +                self.end == other.end and \
   62.69 +                self.ori == other.ori
   62.70 +
   62.71 +    def __ne__(self, other):
   62.72 +        return not (self == other)
   62.73 +
   62.74 +    def tuple(self):
   62.75 +        return (self.start, self.end, self.ori, self.chromosome)
   62.76 +
   62.77 +    def thin_edges(self, min_intersection=config.min_intersection):
   62.78 +        """ return all thin edges (intersected intervals) """
   62.79 +        min_intersection -= 1 # bx intersecter already means intersection of length 1
   62.80 +        if len(self) < min_intersection:
   62.81 +            return []
   62.82 +        pos_start = self.start + min_intersection
   62.83 +        pos_end = self.end - min_intersection
   62.84 +        intersected = []
   62.85 +        if pos_end <= pos_start:
   62.86 +            pos_end, pos_start = pos_start, pos_end
   62.87 +            # fragments intersecting both pos_end and pos_start, whole [pos_start-1, pos_end+1)
   62.88 +            left = self.chromosome.find(pos_start-1, pos_start)
   62.89 +            right = self.chromosome.find(pos_end, pos_end+1)
   62.90 +            intersected = set(left) & set(right)
   62.91 +        else:
   62.92 +            intersected = self.chromosome.find(pos_start, pos_end)
   62.93 +        return filter(lambda r: r != self, intersected)
   62.94 +
   62.95 +    def edges(self, min_intersection=config.min_intersection):
   62.96 +        """ visit thick edge at first and then all thin edges """
   62.97 +        return [self.sib] + self.thin_edges(min_intersection)
   62.98 +
   62.99 +    @property
  62.100 +    def real_start(self):
  62.101 +        """ real start of interval (depends on ori) """
  62.102 +        if self.ori:
  62.103 +            return self.start
  62.104 +        else:
  62.105 +            return self.end - 1
  62.106 +
  62.107 +    @property
  62.108 +    def real_end(self):
  62.109 +        """ real end of interval (depends on ori) """
  62.110 +        if self.ori:
  62.111 +            return self.end
  62.112 +        else:
  62.113 +            return self.start - 1
  62.114 +
  62.115 +    def __len__(self):
  62.116 +        """ length of interval """
  62.117 +        return self.end - self.start
  62.118 +
  62.119 +    @property
  62.120 +    def group_real_end(self):
  62.121 +        """ reletive end of interval """
  62.122 +        if self.group_ori:
  62.123 +            return self.group_real_start + len(self)
  62.124 +        else:
  62.125 +            return self.group_real_start - len(self)
  62.126 +
  62.127 +    @property
  62.128 +    def group_start(self):
  62.129 +        """ min of interval positions in group  """
  62.130 +        if self.group_ori:
  62.131 +            return self.group_real_start
  62.132 +        else:
  62.133 +            return self.group_real_end + 1
  62.134 +
  62.135 +    @property
  62.136 +    def group_end(self):
  62.137 +        """ max of interval positions in group  """
  62.138 +        if self.group_ori:
  62.139 +            return self.group_real_end
  62.140 +        else:
  62.141 +            return self.group_real_start + 1
  62.142 +
  62.143 +    def inherit_from(self, other):
  62.144 +        """ Use other as source interval to set group_ori and group_real_start of this """
  62.145 +        if other == self.sib:
  62.146 +            self.group_real_start = other.group_real_start
  62.147 +            self.group_ori = other.group_ori
  62.148 +        else:
  62.149 +            chromosome_ori = other.ori == other.group_ori
  62.150 +            self.group_ori = self.ori == chromosome_ori
  62.151 +            delta = self.real_start - other.real_start
  62.152 +            if chromosome_ori:
  62.153 +                self.group_real_start = other.group_real_start + delta
  62.154 +            else:
  62.155 +                self.group_real_start = other.group_real_start - delta
  62.156 +
  62.157 +    def __repr__(self):
  62.158 +        if self.group_real_start != None:
  62.159 +            return "%s: [%i(%i), %i(%i))" % (self.chromosome, self.real_start,
  62.160 +                    self.group_real_start, self.real_end, self.group_real_end)
  62.161 +        else:
  62.162 +            return "%s: [%i, %i)" % (self.chromosome, self.real_start, self.real_end)
  62.163 +
  62.164 +    def __str__(self):
  62.165 +        """ returns string to put into table file. IMPORTANT: [start, end] """
  62.166 +        ori = 1 if self.ori else -1
  62.167 +        if self.group_real_start != None:
  62.168 +            group_ori = 1 if self.group_ori else -1
  62.169 +            return "%s %i %i %i %i %i %i" % (self.chromosome, self.start,
  62.170 +                    self.end-1, self.group_start, self.group_end-1, ori, group_ori)
  62.171 +        else:
  62.172 +            return "%s %i %i %i" % (self.chromosome, self.start,
  62.173 +                    self.end, ori)
  62.174 +
  62.175 +class IntervalGroup(list):
  62.176 +    """ list of intervals """
  62.177 +
  62.178 +    def __init__(self):
  62.179 +        self.start = 0
  62.180 +
  62.181 +    def calc_start(self):
  62.182 +        """ shift all grou coordinates to make min position zero """
  62.183 +        offset = -min(min(interval.group_real_start, interval.group_real_end)
  62.184 +                 for interval in self)
  62.185 +        for interval in self:
  62.186 +            interval.group_real_start += offset
  62.187 +
  62.188 +class RepeatJoiner(dict):
  62.189 +    """ dictionary {<chromosome name>: chromosome}
  62.190 +
  62.191 +    Data:
  62.192 +        interval_groups -- list of interval_groups
  62.193 +        _intervals = []
  62.194 +
  62.195 +    >>> rj = RepeatJoiner()
  62.196 +    >>> r1 = Interval(rj, '1', 10, 100)
  62.197 +    >>> r2 = Interval(rj, '1', 50, 150)
  62.198 +    >>> Interval.pair(r1, r2)
  62.199 +    >>> rj.keys()
  62.200 +    ['1']
  62.201 +    >>> rj.build_groups()
  62.202 +    >>> len(set(rj.interval_groups[0]))
  62.203 +    2
  62.204 +    >>> print rj.interval_groups
  62.205 +    [[1: [10(0), 100(90)), 1: [50(0), 150(100))]]
  62.206 +
  62.207 +    >>> rj = RepeatJoiner()
  62.208 +    >>> Interval.pair(Interval(rj, '1', 10, 100), Interval(rj, '1', 110, 200))
  62.209 +    >>> Interval.pair(Interval(rj, '1', 30, 120), Interval(rj, '1', 7110, 7200))
  62.210 +    >>> rj.build_groups()
  62.211 +    >>> print rj.interval_groups
  62.212 +    [[1: [10(0), 100(90)), 1: [110(0), 200(90)), 1: [30(20), 120(110)), 1: [7110(20), 7200(110))]]
  62.213 +
  62.214 +    >>> rj = RepeatJoiner()
  62.215 +    >>> Interval.pair(Interval(rj, '1', 10, 100), Interval(rj, '1', 110, 200))
  62.216 +    >>> Interval.pair(Interval(rj, '1', 6030, 6120), Interval(rj, '1', 7110, 7200))
  62.217 +    >>> rj.build_groups()
  62.218 +    >>> print rj.interval_groups
  62.219 +    [[1: [10(0), 100(90)), 1: [110(0), 200(90))], [1: [6030(0), 6120(90)), 1: [7110(0), 7200(90))]]
  62.220 +
  62.221 +    >>> rj = RepeatJoiner()
  62.222 +    >>> Interval.pair(Interval(rj, '1', 10, 50), Interval(rj, '1', 110, 150))
  62.223 +    >>> Interval.pair(Interval(rj, '1', 140, 180), Interval(rj, '1', 1000, 1040))
  62.224 +    >>> rj.build_groups()
  62.225 +    >>> print rj.interval_groups
  62.226 +    [[1: [10(0), 50(40)), 1: [110(0), 150(40))], [1: [140(0), 180(40)), 1: [1000(0), 1040(40))]]
  62.227 +
  62.228 +    >>> rj = RepeatJoiner()
  62.229 +    >>> Interval.pair(Interval(rj, '1', 10, 100), Interval(rj, '1', 110, 200))
  62.230 +    >>> Interval.pair(Interval(rj, '1', 30, 120, ori=False), Interval(rj, '1', 7110, 7200, ori=False))
  62.231 +    >>> rj.build_groups()
  62.232 +    >>> print rj.interval_groups
  62.233 +    [[1: [10(0), 100(90)), 1: [110(0), 200(90)), 1: [119(109), 29(19)), 1: [7199(109), 7109(19))]]
  62.234 +
  62.235 +    """
  62.236 +    def __init__(self):
  62.237 +        self.interval_groups = []
  62.238 +        self._intervals = []
  62.239 +
  62.240 +    def build_groups(self):
  62.241 +        """ build interval groups """
  62.242 +        used = set()
  62.243 +        for interval in self._intervals:
  62.244 +            if interval not in used:
  62.245 +                interval.group_ori = True
  62.246 +                interval.group_real_start = interval.real_start
  62.247 +                interval_group = IntervalGroup()
  62.248 +                self.interval_groups.append(interval_group)
  62.249 +                q = Queue.Queue()
  62.250 +                q.put(interval)
  62.251 +                while not q.empty():
  62.252 +                    r = q.get()
  62.253 +                    if r not in used:
  62.254 +                        used.add(r)
  62.255 +                        interval_group.append(r)
  62.256 +                        for r1 in r.edges():
  62.257 +                            if r1 not in used:
  62.258 +                                q.put(r1)
  62.259 +                                r1.inherit_from(r)
  62.260 +                interval_group.calc_start()
  62.261 +
  62.262 +
  62.263 +if __name__ == '__main__':
  62.264 +    import doctest
  62.265 +    doctest.testmod()
    63.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    63.2 +++ b/repeats/repeats.py	Tue Jan 25 16:03:00 2011 +0300
    63.3 @@ -0,0 +1,63 @@
    63.4 +"""
    63.5 +Repeats joining tool
    63.6 +
    63.7 +Rereats input format:
    63.8 +    input_id pos_from pos_to
    63.9 +"""
   63.10 +
   63.11 +from allpy import config, alignment, block, sequence
   63.12 +Sequence = sequence.Sequence
   63.13 +Block = block.Block
   63.14 +Alignment = alignment.Alignment
   63.15 +import argparse
   63.16 +import os
   63.17 +from tempfile import NamedTemporaryFile
   63.18 +
   63.19 +r = argparse.FileType('r')
   63.20 +w = argparse.FileType('w')
   63.21 +
   63.22 +p = argparse.ArgumentParser(
   63.23 +description='Repeats joining tool',
   63.24 +formatter_class=argparse.ArgumentDefaultsHelpFormatter,
   63.25 +#~ argument_default=argparse.SUPPRESS,
   63.26 +)
   63.27 +
   63.28 +p.add_argument('-v','--version',action='version',version='%(prog)s 1.0')
   63.29 +p.add_argument('-i',help='Input fasta file with genome',metavar='FILE',type=r,required=True)
   63.30 +p.add_argument('-I',help='Input text file with repeats',metavar='FILE',type=r,required=True)
   63.31 +p.add_argument('-n',help='Fasta identifier',metavar='name',required=True)
   63.32 +p.add_argument('-r',help='Repeat identifier',metavar='name',type=int,required=True)
   63.33 +p.add_argument('-o',help='Output alignment file',metavar='FILE',type=w,required=True)
   63.34 +
   63.35 +tmp_file = None
   63.36 +
   63.37 +try:
   63.38 +    args = p.parse_args()
   63.39 +    repeat_copies_pos = []
   63.40 +    for line in p.i:
   63.41 +        line = line.strip()
   63.42 +        if not line:
   63.43 +            continue
   63.44 +        try:
   63.45 +            repeat_type, pos_from, pos_to = line.strip()
   63.46 +            pos_from = int(pos_from)
   63.47 +            pos_to = int(pos_to)
   63.48 +        except:
   63.49 +            print "Warning: wrong input line '%s'" % line
   63.50 +        if repeat_type == p.r:
   63.51 +            repeat_copies_pos.append((pos_from, pos_to))
   63.52 +    repeat_copies = []
   63.53 +    for pos_from, pos_to in repeat_copies_pos:
   63.54 +        seq = Sequence.file_slice(p.I, pos_from, pos_to, p.r
   63.55 +        repeat_copies.append(seq)
   63.56 +    alignment = Alignment.from_sequences(*repeat_copies)
   63.57 +    alignment.muscle_align()
   63.58 +    alignment.save_fasta(p.o)
   63.59 +
   63.60 +except Exception, t:
   63.61 +    print t
   63.62 +    exit()
   63.63 +
   63.64 +if tmp_file:
   63.65 +    os.unlink(tmp_file)
   63.66 +
    64.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    64.2 +++ b/repeats/test.py	Tue Jan 25 16:03:00 2011 +0300
    64.3 @@ -0,0 +1,38 @@
    64.4 +
    64.5 +import sys
    64.6 +import pprint
    64.7 +
    64.8 +from repeat_joiner import Interval, RepeatJoiner
    64.9 +
   64.10 +rj = RepeatJoiner()
   64.11 +for line in open(sys.argv[1]):
   64.12 +    line = line.strip()
   64.13 +    if line:
   64.14 +        c1, c2, from1, to1, from2, to2, ori1, ori2 = line.split()[:8]
   64.15 +        if c1 == 'DNA_1':
   64.16 +            continue # first line
   64.17 +        ori1 = True if int(ori1) == 1 else False
   64.18 +        ori2 = True if int(ori2) == 1 else False
   64.19 +        from1 = int(from1)
   64.20 +        to1 = int(to1) + 1
   64.21 +        from2 = int(from2)
   64.22 +        to2 = int(to2) + 1
   64.23 +
   64.24 +        r1 = Interval(rj, c1, from1, to1, ori1)
   64.25 +        r2 = Interval(rj, c2, from2, to2, ori2)
   64.26 +        Interval.pair(r1, r2)
   64.27 +
   64.28 +rj.build_groups()
   64.29 +rj.interval_groups.sort(key=lambda g: len(g), reverse=True)
   64.30 +
   64.31 +
   64.32 +print "group\tchr\tchr_from\tchr_to\tgroup_from\tgroup_to\tori\tgroup_ori"
   64.33 +for i, interval_group in enumerate(rj.interval_groups):
   64.34 +    interval_group.sort(key=lambda i: i.group_start)
   64.35 +    prev = set()
   64.36 +    for interval in interval_group:
   64.37 +        if interval.tuple() in prev:
   64.38 +            continue
   64.39 +        prev.add(interval.tuple())
   64.40 +        print "%i\t%s" % (i, str(interval).replace(' ', '\t'))
   64.41 +
    65.1 --- a/sandbox/bufferedcanvas.py	Mon Jan 24 21:40:10 2011 +0300
    65.2 +++ b/sandbox/bufferedcanvas.py	Tue Jan 25 16:03:00 2011 +0300
    65.3 @@ -23,9 +23,9 @@
    65.4  published by the Free Software Foundation; either version 2.1 of the
    65.5  License, or (at your option) any later version.
    65.6  
    65.7 -As a special exception, the copyright holders of this library 
    65.8 +As a special exception, the copyright holders of this library
    65.9  hereby recind Section 3 of the GNU Lesser General Public License. This
   65.10 -means that you MAY NOT apply the terms of the ordinary GNU General 
   65.11 +means that you MAY NOT apply the terms of the ordinary GNU General
   65.12  Public License instead of this License to any given copy of the
   65.13  Library. This has been done to prevent users of the Library from being
   65.14  denied access or the ability to use future improvements.
    66.1 --- a/sandbox/gtk-text.py	Mon Jan 24 21:40:10 2011 +0300
    66.2 +++ b/sandbox/gtk-text.py	Tue Jan 25 16:03:00 2011 +0300
    66.3 @@ -14,7 +14,7 @@
    66.4  
    66.5  	def __init__(self):
    66.6  		window = gtk.Window(gtk.WINDOW_TOPLEVEL)
    66.7 -		window.set_resizable(True)	
    66.8 +		window.set_resizable(True)
    66.9  		window.connect("destroy", self.close_application)
   66.10  
   66.11  		sw = gtk.ScrolledWindow()
   66.12 @@ -95,7 +95,7 @@
   66.13  
   66.14  def main():
   66.15  	gtk.main()
   66.16 -	return 0	   
   66.17 +	return 0
   66.18  
   66.19  if __name__ == "__main__":
   66.20  	TextViewExample()
    67.1 --- a/sandbox/tk-text.py	Mon Jan 24 21:40:10 2011 +0300
    67.2 +++ b/sandbox/tk-text.py	Tue Jan 25 16:03:00 2011 +0300
    67.3 @@ -26,7 +26,7 @@
    67.4  # 	for i in xrange(len(body)):
    67.5  # 		t.insert('end', body[i], 'c%d' % ids[i])
    67.6  # 	t.insert('end', '\n')
    67.7 -# 
    67.8 +#
    67.9  # for i in xrange(11):
   67.10  # 	c = i * 255 // 10
   67.11  # 	t.tag_configure('c%d' % i, background='#%02x%02x%02x' % (c,c,c))
   67.12 @@ -39,10 +39,10 @@
   67.13  # 		t.insert('end', body[i], 'l%dc%d' % (y, ids[i]))
   67.14  # 	t.insert('end', '\n')
   67.15  # 	root.update()
   67.16 -# 
   67.17 +#
   67.18  # print "text created"
   67.19  # root.update()
   67.20 -# 
   67.21 +#
   67.22  # for y in xrange(len(seqs)):
   67.23  # 	for i in xrange(11):
   67.24  # 		c = i * 255 // 10
    68.1 --- a/sandbox/ttk.py	Mon Jan 24 21:40:10 2011 +0300
    68.2 +++ b/sandbox/ttk.py	Tue Jan 25 16:03:00 2011 +0300
    68.3 @@ -78,7 +78,7 @@
    68.4      global seqs
    68.5      names.delete(0, 'end')
    68.6      sequences.delete('1.0', 'end')
    68.7 -    
    68.8 +
    68.9      filename = filedialog.askopenfilename()
   68.10      seqs = set()
   68.11      for item in open(filename).read().split('\n>'):
    69.1 --- a/sandbox/wx-textctrl.py	Mon Jan 24 21:40:10 2011 +0300
    69.2 +++ b/sandbox/wx-textctrl.py	Tue Jan 25 16:03:00 2011 +0300
    69.3 @@ -31,7 +31,7 @@
    69.4  # for i in xrange(11):
    69.5  # 	c = i * 255 // 10
    69.6  # 	attrs.append(wx.TextAttr('black', '#%02x%02x%02x' % (c,c,c)))
    69.7 -# 
    69.8 +#
    69.9  # for name, body, ids, colors in seqs:
   69.10  # 	for x in xrange(len(body)):
   69.11  # 		text.SetDefaultStyle(attrs[ids[x]])
   69.12 @@ -40,7 +40,7 @@
   69.13  
   69.14  # ## v4: output text, then colorize
   69.15  # text.WriteText("\n".join(body for name, body, ids, colors in seqs))
   69.16 -# 
   69.17 +#
   69.18  # i = 0
   69.19  # text.Freeze()
   69.20  # for name, body, ids, colors in seqs:
    70.1 --- a/sec_str/sec_str.py	Mon Jan 24 21:40:10 2011 +0300
    70.2 +++ b/sec_str/sec_str.py	Tue Jan 25 16:03:00 2011 +0300
    70.3 @@ -4,12 +4,12 @@
    70.4  
    70.5  """
    70.6  
    70.7 -from allpy.lib import config, project, block
    70.8 +from allpy import config, alignment, block
    70.9  Block = block.Block
   70.10 -Project = project.Project
   70.11 +Alignment = alignment.Alignment
   70.12  import argparse
   70.13  import sys
   70.14 -from allpy.lib.fasta import determine_long_line
   70.15 +from allpy.fasta import determine_long_line
   70.16  
   70.17  r = argparse.FileType('r')
   70.18  w = argparse.FileType('w')
   70.19 @@ -17,7 +17,7 @@
   70.20  
   70.21  p = argparse.ArgumentParser(
   70.22  description='Secondary structure mapping tool.',
   70.23 -formatter_class=argparse.ArgumentDefaultsHelpFormatter, 
   70.24 +formatter_class=argparse.ArgumentDefaultsHelpFormatter,
   70.25  #~ argument_default=argparse.SUPPRESS,
   70.26  )
   70.27  
   70.28 @@ -30,7 +30,7 @@
   70.29  tmp_file = None
   70.30  
   70.31  try:
   70.32 -    project = Project(args.i)
   70.33 +    alignment = Alignment(args.i)
   70.34  except:
   70.35      args.i.close()
   70.36      tmp_file = NamedTemporaryFile(delete=False)
   70.37 @@ -38,12 +38,11 @@
   70.38      os.system('seqret %(msf)s %(fasta)s' % \
   70.39      {'msf': args.i.name, 'fasta': tmp_file.name})
   70.40      args.i = open(tmp_file.name)
   70.41 -    project = Project(args.i)
   70.42 +    alignment = Alignment(args.i)
   70.43  args.i.seek(0)
   70.44  long_line = determine_long_line(args.i)
   70.45  
   70.46 -#~ project.pdb_auto_add()
   70.47 -block = Block(project)
   70.48 +block = Block(alignment)
   70.49  
   70.50  args.i.seek(0)
   70.51  f = args.f
   70.52 @@ -53,7 +52,7 @@
   70.53          sequence.pdb_auto_add()
   70.54          chain = sequence.pdb_chains[0]
   70.55          sequence.pdb_add_sec_str(chain)
   70.56 -        project.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line)
   70.57 +        alignment.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line)
   70.58          sequence.pdb_unload()
   70.59  f.close()
   70.60  
    71.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    71.2 +++ b/setup.py	Tue Jan 25 16:03:00 2011 +0300
    71.3 @@ -0,0 +1,8 @@
    71.4 +from distutils.core import setup
    71.5 +setup(
    71.6 +	name='allpy',
    71.7 +	version='0.1',
    71.8 +	packages=['allpy'],
    71.9 +)
   71.10 +
   71.11 +# vim: set noet:
    72.1 --- a/test/test.py	Mon Jan 24 21:40:10 2011 +0300
    72.2 +++ b/test/test.py	Tue Jan 25 16:03:00 2011 +0300
    72.3 @@ -1,9 +1,9 @@
    72.4  import sys
    72.5  
    72.6 -from allpy.lib.project import Project
    72.7 -from allpy.lib.block import Block
    72.8 +from allpy.alignment import Alignment
    72.9 +from allpy.block import Block
   72.10  
   72.11 -p = Project(open('test.fasta'))
   72.12 +p = Alignment(open('test.fasta'))
   72.13  print "alignment length: %i" % len(p)
   72.14  print "sequence: %s" % str(p.sequences[0])
   72.15  
    73.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    73.2 +++ b/test/usecase1.py	Tue Jan 25 16:03:00 2011 +0300
    73.3 @@ -0,0 +1,35 @@
    73.4 +from allpy import protein
    73.5 +
    73.6 +# Create sequences from string representation of sequence body
    73.7 +sequence_1 = protein.Sequence.from_string("mkstf", name="E2E4")
    73.8 +sequence_2 = protein.Sequence.from_string("mstkfff", description="Longer sequence")
    73.9 +
   73.10 +# Create alignment from sequences
   73.11 +alignment = protein.Alignment()
   73.12 +alignment.append_sequence(sequence_1)
   73.13 +alignment.append_sequence(sequence_2)
   73.14 +alignment.realign("muscle")
   73.15 +
   73.16 +# For each sequence, print number of gaps and non-gaps in alignment
   73.17 +for row in alignment.rows():
   73.18 +    gaps = 0
   73.19 +    monomers = 0
   73.20 +    for column in alignment.columns:
   73.21 +        if column in row:
   73.22 +            monomers += 1
   73.23 +        else:
   73.24 +            gaps += 1
   73.25 +    print "%s: %s gaps, %s non-gaps" % (row.sequence.name, gaps, monomers)
   73.26 +
   73.27 +# Print number of gaps in each column
   73.28 +gaps = []
   73.29 +for column in alignment.columns:
   73.30 +    column_gaps = 0
   73.31 +    for sequence in alignment.sequences:
   73.32 +        if sequence not in column:
   73.33 +            column_gaps += 1
   73.34 +    gaps.append(column_gaps)
   73.35 +print " ".join(map(str, gaps))
   73.36 +
   73.37 +# Write alignment to file
   73.38 +alignment.to_fasta(open("new_file.fasta", "w"))
    74.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    74.2 +++ b/test/usecase2.py	Tue Jan 25 16:03:00 2011 +0300
    74.3 @@ -0,0 +1,75 @@
    74.4 +# Fragments are in pair_repeat.fasta
    74.5 +from allpy import dna
    74.6 +width = 15
    74.7 +treshold = 14
    74.8 +
    74.9 +def my_column_mark(column, threshold):
   74.10 +    """Helper to mark column (given as dict) by identity."""
   74.11 +    count = {}
   74.12 +    for sequence, monomer in column:
   74.13 +        code = monomer.code1
   74.14 +        count[code] = count.get(code, 0) + 1
   74.15 +    for code in count:
   74.16 +        if count[code] > threshold:
   74.17 +            return "+"
   74.18 +    return "-"
   74.19 +
   74.20 +def my_pair_mark(column):
   74.21 +    """Helper to mark column of 2 sequences (given as list) by identity."""
   74.22 +    if column[0] is None or column[1] is None:
   74.23 +        return "-"
   74.24 +    if column[0].code1 == column[0].code1:   
   74.25 +        return "+"
   74.26 +    return "-"
   74.27 +
   74.28 +def find_runs(markup):
   74.29 +    """Fund long positive runs.
   74.30 +    
   74.31 +    This obscure and probably broken function has nothing to do with allpy,
   74.32 +    so it's presence in the example is unnecessary.
   74.33 +    """
   74.34 +    position = 0
   74.35 +    count = 0
   74.36 +    plus_positions=[]
   74.37 +    for i in range(len(markup)):
   74.38 +        position += 1
   74.39 +        if position < width :
   74.40 +            if markup[i]=="+":
   74.41 +                count += 1
   74.42 +            continue 
   74.43 +        if position > width:
   74.44 +            if markup[i-width] == "+":
   74.45 +                count -=1 
   74.46 +        if count >= treshold:
   74.47 +            plus_positions.append(position)
   74.48 +    if len(plus_positions)==0:
   74.49 +        raise Exception("No blocks in alignment")
   74.50 +
   74.51 +    blocks=[]
   74.52 +    start = plus_positions[0]-width + 1
   74.53 +    stop = plus_positions[0]
   74.54 +    for p in plus_positions[1:]:
   74.55 +        if p == stop +1:
   74.56 +            stop = p
   74.57 +            continue
   74.58 +        blocks.append((start,stop))
   74.59 +        start = p - width + 1
   74.60 +        stop = p
   74.61 +    return blocks
   74.62 +
   74.63 +def main():
   74.64 +    alignment = dna.Alignment.from_file(open("pair_repeat.fasta"))
   74.65 +    if len(alignment.sequences) != 2:
   74.66 +        raise Exception("Input must have exactly 2 sequences!")
   74.67 +    alignment.realign("needle", gap_open = 0)
   74.68 +    markup = []
   74.69 +    for column in alignment.columns_as_lists():
   74.70 +      markup.append(my_pair_mark(column))
   74.71 +    markup = alignment.map_columns(my_pair_mark)
   74.72 +    print find_runs(markup)
   74.73 +
   74.74 +try:
   74.75 +    main()
   74.76 +except Exception, e:
   74.77 +    print "An error has occured:", e
   74.78 +
    75.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    75.2 +++ b/test/usecase3.py	Tue Jan 25 16:03:00 2011 +0300
    75.3 @@ -0,0 +1,36 @@
    75.4 +from allpy import protein
    75.5 +alignment = protein.Alignment.from_file(open("aln.fasta"))
    75.6 +#conservative = [(10,20), (40,50)]
    75.7 +conservative = [(0,6),(18,37)]
    75.8 +
    75.9 +def ranges_to_markup(ranges):
   75.10 +    """Convert list of ranges to line of markup.
   75.11 +    
   75.12 +    This has nothing to do with allpy.
   75.13 +    """
   75.14 +    markup = ["-"] * len(alignment.columns)
   75.15 +    for begin, end in ranges:
   75.16 +        for i in range(begin, end+1):
   75.17 +            markup[i] = "+"
   75.18 +    return "".join(markup)
   75.19 +
   75.20 +def markup_to_blocks(markup):
   75.21 +    """Convert markup line to a bunch of blocks, one for each sequential run."""
   75.22 +    current = None
   75.23 +    blocks = {}
   75.24 +    for mark, column in zip(markup, alignment.columns):
   75.25 +        if mark != current:
   75.26 +            block = protein.Block.from_alignment(alignment, columns=[])
   75.27 +            blocks[mark] = blocks.get(mark, []) + [block]
   75.28 +        current = mark
   75.29 +        blocks[mark][-1].columns.append(column)
   75.30 +    return blocks
   75.31 +
   75.32 +def main():
   75.33 +    markup = ranges_to_markup(conservative)
   75.34 +    blocks = markup_to_blocks(markup)
   75.35 +    for block in blocks["-"]:
   75.36 +        block.flush_left()
   75.37 +    alignment.to_fasta(open("output.fasta", "w"))
   75.38 +
   75.39 +main()
    76.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    76.2 +++ b/utils/flush_left.py	Tue Jan 25 16:03:00 2011 +0300
    76.3 @@ -0,0 +1,48 @@
    76.4 +#!/usr/bin/python
    76.5 +"""Flush all monomers in given range to the left, all gaps to the right.
    76.6 +
    76.7 +All position indexes are counting from 1.
    76.8 +"""
    76.9 +import optparse
   76.10 +import sys
   76.11 +import os
   76.12 +from allpy import protein
   76.13 +
   76.14 +def main():
   76.15 +    alignment = protein.Alignment.from_file(open(options.in_file))
   76.16 +    if not options.begin:
   76.17 +        options.begin = 1
   76.18 +    if not options.end:
   76.19 +        options.end = len(alignment.columns)
   76.20 +    columns = alignment.columns[options.begin-1:options.end]
   76.21 +    block = protein.Block.from_alignment(alignment, columns=columns)
   76.22 +    block.flush_left()
   76.23 +    alignment.to_fasta(open(options.out_file, "w"))
   76.24 +    if options.msf:
   76.25 +        os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf")
   76.26 +        os.system("rm " + options.out_file)
   76.27 +
   76.28 +if __name__ == "__main__":
   76.29 +    usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip())
   76.30 +    parser = optparse.OptionParser(usage=usage)
   76.31 +    parser.add_option("-i", "--in-file",
   76.32 +        help="Input alignment file (in FASTA format)")
   76.33 +    parser.add_option("-o", "--out-file",
   76.34 +        help="Output file")
   76.35 +    parser.add_option("-b", "--begin", type=int,
   76.36 +        help="Position in alignment to start from")
   76.37 +    parser.add_option("-e", "--end", type=int,
   76.38 +        help="Position in alignment to end with")
   76.39 +    parser.add_option("-m", "--msf", action='store_true',
   76.40 +        help="Output in MSF format (FASTA by default)")
   76.41 +
   76.42 +    options, args = parser.parse_args()
   76.43 +
   76.44 +    if args:
   76.45 +        parser.error("We take no positional arguments.")
   76.46 +    if not options.in_file or not options.out_file:
   76.47 +        parser.error("Both -i and -o parameters must be given.")
   76.48 +
   76.49 +    main()
   76.50 +
   76.51 +# vim: set et ts=4 sts=4 sw=4:
    77.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    77.2 +++ b/utils/flush_left_vblock.py	Tue Jan 25 16:03:00 2011 +0300
    77.3 @@ -0,0 +1,48 @@
    77.4 +#!/usr/bin/python
    77.5 +"""Flush all monomers in given range to the left, all gaps to the right.
    77.6 +
    77.7 +All position indexes are counting from 1.
    77.8 +"""
    77.9 +import optparse
   77.10 +import sys
   77.11 +import os
   77.12 +from allpy import protein
   77.13 +
   77.14 +def main():
   77.15 +    alignment = protein.Alignment.from_file(open(options.in_file))
   77.16 +    if not options.begin:
   77.17 +        options.begin = 1
   77.18 +    if not options.end:
   77.19 +        options.end = len(alignment.columns)
   77.20 +    columns = alignment.columns[options.begin-1:options.end]
   77.21 +    block = protein.Block.from_alignment(alignment, columns=columns)
   77.22 +    block.flush_left()
   77.23 +    alignment.to_fasta(open(options.out_file, "w"))
   77.24 +    if options.msf:
   77.25 +        os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf")
   77.26 +        os.system("rm " + options.out_file)
   77.27 +
   77.28 +if __name__ == "__main__":
   77.29 +    usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip())
   77.30 +    parser = optparse.OptionParser(usage=usage)
   77.31 +    parser.add_option("-i", "--in-file",
   77.32 +        help="Input alignment file (in FASTA format)")
   77.33 +    parser.add_option("-o", "--out-file",
   77.34 +        help="Output file")
   77.35 +    parser.add_option("-b", "--begin", type=int,
   77.36 +        help="Position in alignment to start from")
   77.37 +    parser.add_option("-e", "--end", type=int,
   77.38 +        help="Position in alignment to end with")
   77.39 +    parser.add_option("-m", "--msf", action='store_true',
   77.40 +        help="Output in MSF format (FASTA by default)")
   77.41 +
   77.42 +    options, args = parser.parse_args()
   77.43 +
   77.44 +    if args:
   77.45 +        parser.error("We take no positional arguments.")
   77.46 +    if not options.in_file or not options.out_file:
   77.47 +        parser.error("Both -i and -o parameters must be given.")
   77.48 +
   77.49 +    main()
   77.50 +
   77.51 +# vim: set et ts=4 sts=4 sw=4:
    78.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    78.2 +++ b/utils/flush_left_vblocks.py	Tue Jan 25 16:03:00 2011 +0300
    78.3 @@ -0,0 +1,71 @@
    78.4 +#!/usr/bin/python
    78.5 +"""Flush all monomers in given ranges to the left, all gaps to the right.
    78.6 +
    78.7 +All position indexes are counting from 1.
    78.8 +
    78.9 +File with ranges format is as follows
   78.10 +#from to
   78.11 +10 15
   78.12 +107 121
   78.13 +etc
   78.14 +
   78.15 +Lines with "#" in 1st position are skipped
   78.16 + 
   78.17 +"""
   78.18 +import optparse
   78.19 +import sys
   78.20 +import os
   78.21 +from allpy import protein
   78.22 +
   78.23 +def main(ranges):
   78.24 +    alignment = protein.Alignment.from_file(open(options.in_file))
   78.25 +    for begin, end in ranges:
   78.26 +        columns = alignment.columns[begin-1:end]
   78.27 +        block = protein.Block.from_alignment(alignment, columns=columns)
   78.28 +        block.flush_left()
   78.29 +
   78.30 +    alignment.to_fasta(open(options.out_file, "w"))
   78.31 +    if options.msf:
   78.32 +        os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf")
   78.33 +        os.system("rm " + options.out_file)
   78.34 +    
   78.35 +def ranges():
   78.36 +    ranges = []
   78.37 +    for line_no, line in enumerate(open(options.ranges), 1):        
   78.38 +        if line.strip() == "":
   78.39 +            continue
   78.40 +        if line[0] == "#":
   78.41 +            continue
   78.42 +        try:
   78.43 +            begin, end = line.strip().split()
   78.44 +            begin = int(begin)
   78.45 +            end = int(end)
   78.46 +        except Exception:
   78.47 +            print "Warning: wrong format in line %s, ignoring" % line_no
   78.48 +            continue
   78.49 +        ranges.append( (begin, end) )
   78.50 +    return ranges      
   78.51 +
   78.52 +
   78.53 +if __name__ == "__main__":
   78.54 +    usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip())
   78.55 +    parser = optparse.OptionParser(usage=usage)
   78.56 +    parser.add_option("-i", "--in-file",
   78.57 +        help="Input alignment file (in FASTA format)")
   78.58 +    parser.add_option("-o", "--out-file",
   78.59 +        help="Output file")
   78.60 +    parser.add_option("-r", "--ranges",
   78.61 +        help="Input file with ranges")
   78.62 +    parser.add_option("-m", "--msf", action='store_true',
   78.63 +        help="Output in MSF format (FASTA by default)")
   78.64 +
   78.65 +    options, args = parser.parse_args()
   78.66 +
   78.67 +    if args:
   78.68 +        parser.error("We take no positional arguments.")
   78.69 +    if not options.in_file or not options.out_file:
   78.70 +        parser.error("Both -i and -o parameters must be given.")
   78.71 +
   78.72 +    main(ranges())
   78.73 +
   78.74 +# vim: set et ts=4 sts=4 sw=4: