allpy
changeset 360:2785ca30cea5
Automated merge with ssh://kodomo/allpy
author | Daniil Alexeyevsky <me.dendik@gmail.com> |
---|---|
date | Tue, 25 Jan 2011 16:03:00 +0300 |
parents | 3048f18154ef 1c06c34c4df2 |
children | ef122cabf4e8 |
files | __init__.py blocks3d/wt/config.C geometrical_core/geometrical_core.py lib/__init__.py lib/allpy_data/AAdict.py lib/allpy_data/__init__.py lib/allpy_data/amino_acids.py lib/allpy_data/blossum62.py lib/allpy_pdb.py lib/block.py lib/config.py lib/fasta.py lib/graph.py lib/monomer.py lib/project.py lib/sequence.py sec_str/sec_str.py |
diffstat | 78 files changed, 4642 insertions(+), 3324 deletions(-) [+] |
line diff
1.1 --- a/.hgignore Mon Jan 24 21:40:10 2011 +0300 1.2 +++ b/.hgignore Tue Jan 25 16:03:00 2011 +0300 1.3 @@ -1,4 +1,27 @@ 1.4 syntax: glob 1.5 + 1.6 +# Compiler-generated junk 1.7 *.pyc 1.8 build 1.9 + 1.10 +# Junk from text-editors 1.11 *.geany 1.12 +*.swp 1.13 + 1.14 +# Temporary files from sphinx 1.15 +docs/build 1.16 + 1.17 +# Temporary files from debianization 1.18 +debian/python-allpy/ 1.19 +debian/geometrical-core/ 1.20 +debian/*.substvars 1.21 +debian/*.debhelper* 1.22 +debian/*stamp* 1.23 +debian/files 1.24 +debian/control 1.25 +debian/*.log 1.26 +debian/*/*.log 1.27 +debian/tmp/ 1.28 +debian/pycompat 1.29 +*.deb 1.30 +*.cdbs-config_list
2.1 --- a/.hgtags Mon Jan 24 21:40:10 2011 +0300 2.2 +++ b/.hgtags Tue Jan 25 16:03:00 2011 +0300 2.3 @@ -1,1 +1,2 @@ 2.4 c51ef42eb5e5c2c98dac3c99271777905fb4da76 first run 2.5 +c1b67c10f8f3db62cce4b70c292a5882e91bf5b6 0.0
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/README.devel Tue Jan 25 16:03:00 2011 +0300 3.3 @@ -0,0 +1,13 @@ 3.4 +To use the library in-place, without installing, you have to add it's 3.5 +directory to PYTHONPATH. 3.6 + 3.7 +e.g, type this in your shell: 3.8 + 3.9 + export PYTHONPATH=`pwd` 3.10 + 3.11 +alternatively, you can type: 3.12 + 3.13 + . ./SETPATH 3.14 + 3.15 +which adds the right path to PYTHONPATH and also adds 'utils' directory 3.16 +to PATH
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/SETPATH Tue Jan 25 16:03:00 2011 +0300 4.3 @@ -0,0 +1,22 @@ 4.4 +#!/bin/bash 4.5 + 4.6 +say () { echo "$@" >&2; } 4.7 + 4.8 +main () { 4.9 + local newPYTHONPATH 4.10 + 4.11 + [[ "$0" != *sh ]] && say "Usage: . $0" && return 1 4.12 + 4.13 + newPYTHONPATH="$(readlink -f "$(dirname "$BASH_SOURCE")")" 4.14 + 4.15 + [[ "$PYTHONPATH" = "$newPYTHONPATH" ]] && say "Already done" && return 4.16 + [[ -n "$PYTHONPATH" ]] && say "PYTHONPATH was not empty" && return 4.17 + 4.18 + export PYTHONPATH="$newPYTHONPATH" 4.19 + export PATH="$newPYTHONPATH/utils:$PATH" 4.20 + 4.21 + echo "PYTHONPATH=$PYTHONPATH" 4.22 + echo "PATH=$PATH" 4.23 +} 4.24 + 4.25 +main
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/allpy/base.py Tue Jan 25 16:03:00 2011 +0300 5.3 @@ -0,0 +1,340 @@ 5.4 +import sys 5.5 +import re 5.6 + 5.7 +import util 5.8 +import fasta 5.9 + 5.10 +default_gaps = set((".", "-", "~")) 5.11 +"""Set of characters to recoginze as gaps when parsing alignment.""" 5.12 + 5.13 +class Monomer(object): 5.14 + """Monomer object.""" 5.15 + 5.16 + type = None 5.17 + """Either of 'dna', 'rna', 'protein'.""" 5.18 + 5.19 + by_code1 = {} 5.20 + """A mapping from 1-letter code to Monomer subclass.""" 5.21 + 5.22 + by_code3 = {} 5.23 + """A mapping from 3-letter code to Monomer subclass.""" 5.24 + 5.25 + by_name = {} 5.26 + """A mapping from full monomer name to Monomer subclass.""" 5.27 + 5.28 + @classmethod 5.29 + def _subclass(cls, name='', code1='', code3='', is_modified=False): 5.30 + """Create new subclass of Monomer for given monomer type.""" 5.31 + class TheMonomer(cls): 5.32 + pass 5.33 + name = name.strip().capitalize() 5.34 + code1 = code1.upper() 5.35 + code3 = code3.upper() 5.36 + TheMonomer.__name__ = re.sub(r"[^\w]", "_", name) 5.37 + TheMonomer.name = name 5.38 + TheMonomer.code1 = code1 5.39 + TheMonomer.code3 = code3 5.40 + TheMonomer.is_modified = is_modified 5.41 + if not is_modified: 5.42 + cls.by_code1[code1] = TheMonomer 5.43 + cls.by_code3[code3] = TheMonomer 5.44 + cls.by_name[name] = TheMonomer 5.45 + # We duplicate distinguished long names into Monomer itself, so that we 5.46 + # can use Monomer.from_code3 to create the relevant type of monomer. 5.47 + Monomer.by_code3[code3] = TheMonomer 5.48 + Monomer.by_name[name] = TheMonomer 5.49 + 5.50 + @classmethod 5.51 + def _initialize(cls, codes=None): 5.52 + """Create all relevant subclasses of Monomer.""" 5.53 + # NB. The table uses letters d, r, p for types, 5.54 + # while we use full words; hence, we compare by first letter 5.55 + for type, code1, is_modified, code3, name in codes: 5.56 + if type[0] == cls.type[0]: 5.57 + cls._subclass(name, code1, code3, is_modified) 5.58 + 5.59 + @classmethod 5.60 + def from_code1(cls, code1): 5.61 + """Create new monomer from 1-letter code.""" 5.62 + return cls.by_code1[code1.upper()]() 5.63 + 5.64 + @classmethod 5.65 + def from_code3(cls, code3): 5.66 + """Create new monomer from 3-letter code.""" 5.67 + return cls.by_code3[code3.upper()]() 5.68 + 5.69 + @classmethod 5.70 + def from_name(cls, name): 5.71 + """Create new monomer from full name.""" 5.72 + return cls.by_name[name.strip().capitalize()]() 5.73 + 5.74 + def __repr__(self): 5.75 + return '<Monomer %s>' % self.code3 5.76 + 5.77 + def __str__(self): 5.78 + """Returns one-letter code""" 5.79 + return self.code1 5.80 + 5.81 + def __eq__(self, other): 5.82 + """Monomers within same monomer type are compared by code1.""" 5.83 + assert self.type == other.type 5.84 + return self.code1 == other.code1 5.85 + 5.86 +class Sequence(list): 5.87 + """Sequence of Monomers. 5.88 + 5.89 + This behaves like list of monomer objects. In addition to standard list 5.90 + behaviour, Sequence has the following attributes: 5.91 + 5.92 + * name -- str with the name of the sequence 5.93 + * description -- str with description of the sequence 5.94 + * source -- str denoting source of the sequence 5.95 + 5.96 + Any of them may be empty (i.e. hold empty string) 5.97 + 5.98 + Class attributes: 5.99 + 5.100 + * monomer_type -- type of monomers in sequence, must be redefined when 5.101 + subclassing 5.102 + """ 5.103 + 5.104 + monomer_type = Monomer 5.105 + 5.106 + name = '' 5.107 + description = '' 5.108 + source = '' 5.109 + 5.110 + @classmethod 5.111 + def from_monomers(cls, monomers=[], name=None, description=None, source=None): 5.112 + """Create sequence from a list of monomer objecst.""" 5.113 + result = cls() 5.114 + if name: 5.115 + result.name = name 5.116 + if description: 5.117 + result.description = description 5.118 + if source: 5.119 + result.source = source 5.120 + return result 5.121 + 5.122 + @classmethod 5.123 + def from_string(cls, string, name='', description='', source=''): 5.124 + """Create sequences from string of one-letter codes.""" 5.125 + monomer = cls.monomer_type.from_code1 5.126 + monomers = [monomer(letter) for letter in string] 5.127 + return cls.from_monomers(monomers, name, description, source) 5.128 + 5.129 + def __repr__(self): 5.130 + return '<Sequence %s>' % str(self) 5.131 + 5.132 + def __str__(self): 5.133 + """Returns sequence of one-letter codes.""" 5.134 + return ''.join(monomer.code1 for monomer in self) 5.135 + 5.136 + def __hash__(self): 5.137 + """Hash sequence by identity.""" 5.138 + return id(self) 5.139 + 5.140 +class Alignment(object): 5.141 + """Alignment. It is a list of Columns.""" 5.142 + 5.143 + sequence_type = Sequence 5.144 + """Type of sequences in alignment. SHOULD be redefined when subclassing.""" 5.145 + 5.146 + sequences = None 5.147 + """Ordered list of sequences in alignment. Read, but DO NOT FIDDLE!""" 5.148 + 5.149 + def __init__(self): 5.150 + """Initialize empty alignment.""" 5.151 + self.sequences = [] 5.152 + self.columns = [] 5.153 + 5.154 + # Alignment modification methods 5.155 + # ============================== 5.156 + 5.157 + def append_sequence(self, sequence): 5.158 + """Add sequence to alignment. 5.159 + 5.160 + If sequence is too short, pad it with gaps on the right. 5.161 + """ 5.162 + self.sequences.append(sequence) 5.163 + for i, monomer in enumerate(sequence): 5.164 + self.column_at(i)[sequence] = monomer 5.165 + 5.166 + def append_row(self, string, name='', description='', source='', 5.167 + gaps=default_gaps): 5.168 + """Add row from a string of one-letter codes and gaps.""" 5.169 + Sequence = self.sequence_type 5.170 + not_gap = lambda (i, char): char not in gaps 5.171 + without_gaps = util.remove_each(string, gaps) 5.172 + sequence = Sequence.from_string(without_gaps, name, description, source) 5.173 + # The following line has some simple magic: 5.174 + # 1. attach natural numbers to monomers 5.175 + # 2. delete gaps 5.176 + # 3. attach numbers again 5.177 + # This way we have a pair of numbers attached to monomer: 5.178 + # - it's position in alignment (the first attached number, j) 5.179 + # - it's position in sequence (the second attached number, i) 5.180 + for i, (j, char) in enumerate(filter(not_gap, enumerate(string))): 5.181 + self.column_at(j)[sequence] = sequence[i] 5.182 + self.sequences.append(sequence) 5.183 + 5.184 + def column_at(self, n): 5.185 + """Return column by index. Create required new columns if required. 5.186 + 5.187 + Do NOT use this method, unless you are sure it is what you want. 5.188 + """ 5.189 + for i in range(len(self.columns), n + 1): 5.190 + self.columns.append(Column()) 5.191 + return self.columns[n] 5.192 + 5.193 + # Alignment IO methods 5.194 + # ==================== 5.195 + 5.196 + @classmethod 5.197 + def from_file(cls, file, format='fasta', gaps=default_gaps): 5.198 + """Create new alignment from file.""" 5.199 + self = cls() 5.200 + assert format == 'fasta' 5.201 + for (name, description, body) in fasta.parse_file(file): 5.202 + self.append_row(body, name, description, file.name, gaps) 5.203 + return self 5.204 + 5.205 + def to_fasta(self, file): 5.206 + """Write alignment in FASTA file as sequences with gaps.""" 5.207 + def char(monomer): 5.208 + if monomer: 5.209 + return monomer.code1 5.210 + return "-" 5.211 + for row in self.rows_as_lists(): 5.212 + seq = row.sequence 5.213 + line = "".join(map(char, row)) 5.214 + fasta.save_file(file, line, seq.name, seq.description) 5.215 + 5.216 + # Data access methods for alignment 5.217 + # ================================= 5.218 + 5.219 + def rows(self): 5.220 + """Return list of rows (temporary objects) in alignment. 5.221 + 5.222 + Each row is a dictionary of { column : monomer }. 5.223 + 5.224 + For gap positions there is no key for the column in row. 5.225 + 5.226 + Each row has attribute `sequence` pointing to the sequence the row is 5.227 + describing. 5.228 + 5.229 + Modifications of row have no effect on the alignment. 5.230 + """ 5.231 + # For now, the function returns a list rather than iterator. 5.232 + # It is yet to see, whether memory performance here becomes critical, 5.233 + # or is random access useful. 5.234 + rows = [] 5.235 + for sequence in self.sequences: 5.236 + row = util.UserDict() 5.237 + row.sequence = sequence 5.238 + for column in self.columns: 5.239 + if sequence in column: 5.240 + row[column] = column[sequence] 5.241 + rows.append(row) 5.242 + return rows 5.243 + 5.244 + def rows_as_lists(self): 5.245 + """Return list of rows (temporary objects) in alignment. 5.246 + 5.247 + Each row here is a list of either monomer or None (for gaps). 5.248 + 5.249 + Each row has attribute `sequence` pointing to the sequence of row. 5.250 + 5.251 + Modifications of row have no effect on the alignment. 5.252 + """ 5.253 + rows = [] 5.254 + for sequence in self.sequences: 5.255 + row = util.UserList() 5.256 + row.sequence = sequence 5.257 + for column in self.columns: 5.258 + row.append(column.get(sequence)) 5.259 + rows.append(row) 5.260 + return rows 5.261 + 5.262 + def columns_as_lists(self): 5.263 + """Return list of columns (temorary objects) in alignment. 5.264 + 5.265 + Each column here is a list of either monomer or None (for gaps). 5.266 + 5.267 + Items of column are sorted in the same way as alignment.sequences. 5.268 + 5.269 + Modifications of column have no effect on the alignment. 5.270 + """ 5.271 + columns = [] 5.272 + for column in self.columns: 5.273 + col = [] 5.274 + for sequence in self.sequences: 5.275 + col.append(column.get(sequence)) 5.276 + columns.append(col) 5.277 + return columns 5.278 + 5.279 +class Column(dict): 5.280 + """Column of alignment. 5.281 + 5.282 + Column is a dict of { sequence : monomer }. 5.283 + 5.284 + For sequences that have gaps in current row, given key is not present in 5.285 + the column. 5.286 + """ 5.287 + 5.288 + def __hash__(self): 5.289 + """Return hash by identity.""" 5.290 + return id(self) 5.291 + 5.292 +class Block(Alignment): 5.293 + """Block of alignment. 5.294 + 5.295 + Block is intersection of a set of columns & a set of rows. Most of blocks 5.296 + look like rectangular part of alignment if you shuffle alignment rows the 5.297 + right way. 5.298 + """ 5.299 + 5.300 + alignment = None 5.301 + """Alignment the block belongs to.""" 5.302 + 5.303 + sequences = () 5.304 + """List of sequences in block.""" 5.305 + 5.306 + columns = () 5.307 + """List of columns in block.""" 5.308 + 5.309 + @classmethod 5.310 + def from_alignment(cls, alignment, sequences=None, columns=None): 5.311 + """Build new block from alignment. 5.312 + 5.313 + If sequences are not given, the block uses all sequences in alignment. 5.314 + 5.315 + If columns are not given, the block uses all columns in alignment. 5.316 + 5.317 + In both cases we use exactly the list used in alignment, thus, if new 5.318 + sequences or columns are added to alignment, the block tracks this too. 5.319 + """ 5.320 + if sequences is None: 5.321 + sequences = alignment.sequences 5.322 + if columns is None: 5.323 + columns = alignment.columns 5.324 + block = cls() 5.325 + block.alignment = alignment 5.326 + block.sequences = sequences 5.327 + block.columns = columns 5.328 + return block 5.329 + 5.330 + def flush_left(self): 5.331 + """Move all monomers to the left, gaps to the right within block.""" 5.332 + padding = [None] * len(self.columns) 5.333 + for row in self.rows_as_lists(): 5.334 + sequence = row.sequence 5.335 + row = filter(None, row) + padding 5.336 + for monomer, column in zip(row, self.columns): 5.337 + if monomer: 5.338 + column[sequence] = monomer 5.339 + elif sequence in column: 5.340 + del column[sequence] 5.341 + 5.342 + 5.343 +# vim: set ts=4 sts=4 sw=4 et:
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/allpy/config.py Tue Jan 25 16:03:00 2011 +0300 6.3 @@ -0,0 +1,18 @@ 6.4 + 6.5 +delta = 2.0 # for geometrical core building 6.6 +minsize = 20 # min size of returning cores 6.7 +maxabsent = 0.15 # deprecated? 6.8 + 6.9 +# pdb download url (XXXX is pdb code place) 6.10 +pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb' 6.11 +pdb_dir = '/tmp/%s.pdb' 6.12 +timeout = 10 # time in sec. for BRON-KERBOSH algorithm 6.13 + 6.14 + 6.15 +# min part or new atoms in new alternative core 6.16 +ac_new_atoms = 0.5 6.17 + 6.18 +# max number of cores (including main core) 6.19 +ac_count = 5 6.20 + 6.21 +
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/allpy/data/__init__.py Tue Jan 25 16:03:00 2011 +0300 7.3 @@ -0,0 +1,3 @@ 7.4 +""" 7.5 +Module that contains various data relevant to biological sequences. 7.6 +"""
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/allpy/data/blossum62.py Tue Jan 25 16:03:00 2011 +0300 8.3 @@ -0,0 +1,28 @@ 8.4 +matrix = { 8.5 +"A": {"A": 4, "R": -1, "N": -2, "D": -2, "C": 0, "Q": -1, "E": -1, "G": 0, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S": 1, "T": 0, "W": -3, "Y": -2, "V": 0, "B": -2, "Z": -1, "X": 0, "*": -4}, 8.6 +"R": {"A": -1, "R": 5, "N": 0, "D": -2, "C": -3, "Q": 1, "E": 0, "G": -2, "H": 0, "I": -3, "L": -2, "K": 2, "M": -1, "F": -3, "P": -2, "S": -1, "T": -1, "W": -3, "Y": -2, "V": -3, "B": -1, "Z": 0, "X": -1, "*": -4}, 8.7 +"N": {"A": -2, "R": 0, "N": 6, "D": 1, "C": -3, "Q": 0, "E": 0, "G": 0, "H": 1, "I": -3, "L": -3, "K": 0, "M": -2, "F": -3, "P": -2, "S": 1, "T": 0, "W": -4, "Y": -2, "V": -3, "B": 3, "Z": 0, "X": -1, "*": -4}, 8.8 +"D": {"A": -2, "R": -2, "N": 1, "D": 6, "C": -3, "Q": 0, "E": 2, "G": -1, "H": -1, "I": -3, "L": -4, "K": -1, "M": -3, "F": -3, "P": -1, "S": 0, "T": -1, "W": -4, "Y": -3, "V": -3, "B": 4, "Z": 1, "X": -1, "*": -4}, 8.9 +"C": {"A": 0, "R": -3, "N": -3, "D": -3, "C": 9, "Q": -3, "E": -4, "G": -3, "H": -3, "I": -1, "L": -1, "K": -3, "M": -1, "F": -2, "P": -3, "S": -1, "T": -1, "W": -2, "Y": -2, "V": -1, "B": -3, "Z": -3, "X": -2, "*": -4}, 8.10 +"Q": {"A": -1, "R": 1, "N": 0, "D": 0, "C": -3, "Q": 5, "E": 2, "G": -2, "H": 0, "I": -3, "L": -2, "K": 1, "M": 0, "F": -3, "P": -1, "S": 0, "T": -1, "W": -2, "Y": -1, "V": -2, "B": 0, "Z": 3, "X": -1, "*": -4}, 8.11 +"E": {"A": -1, "R": 0, "N": 0, "D": 2, "C": -4, "Q": 2, "E": 5, "G": -2, "H": 0, "I": -3, "L": -3, "K": 1, "M": -2, "F": -3, "P": -1, "S": 0, "T": -1, "W": -3, "Y": -2, "V": -2, "B": 1, "Z": 4, "X": -1, "*": -4}, 8.12 +"G": {"A": 0, "R": -2, "N": 0, "D": -1, "C": -3, "Q": -2, "E": -2, "G": 6, "H": -2, "I": -4, "L": -4, "K": -2, "M": -3, "F": -3, "P": -2, "S": 0, "T": -2, "W": -2, "Y": -3, "V": -3, "B": -1, "Z": -2, "X": -1, "*": -4}, 8.13 +"H": {"A": -2, "R": 0, "N": 1, "D": -1, "C": -3, "Q": 0, "E": 0, "G": -2, "H": 8, "I": -3, "L": -3, "K": -1, "M": -2, "F": -1, "P": -2, "S": -1, "T": -2, "W": -2, "Y": 2, "V": -3, "B": 0, "Z": 0, "X": -1, "*": -4}, 8.14 +"I": {"A": -1, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -3, "E": -3, "G": -4, "H": -3, "I": 4, "L": 2, "K": -3, "M": 1, "F": 0, "P": -3, "S": -2, "T": -1, "W": -3, "Y": -1, "V": 3, "B": -3, "Z": -3, "X": -1, "*": -4}, 8.15 +"L": {"A": -1, "R": -2, "N": -3, "D": -4, "C": -1, "Q": -2, "E": -3, "G": -4, "H": -3, "I": 2, "L": 4, "K": -2, "M": 2, "F": 0, "P": -3, "S": -2, "T": -1, "W": -2, "Y": -1, "V": 1, "B": -4, "Z": -3, "X": -1, "*": -4}, 8.16 +"K": {"A": -1, "R": 2, "N": 0, "D": -1, "C": -3, "Q": 1, "E": 1, "G": -2, "H": -1, "I": -3, "L": -2, "K": 5, "M": -1, "F": -3, "P": -1, "S": 0, "T": -1, "W": -3, "Y": -2, "V": -2, "B": 0, "Z": 1, "X": -1, "*": -4}, 8.17 +"M": {"A": -1, "R": -1, "N": -2, "D": -3, "C": -1, "Q": 0, "E": -2, "G": -3, "H": -2, "I": 1, "L": 2, "K": -1, "M": 5, "F": 0, "P": -2, "S": -1, "T": -1, "W": -1, "Y": -1, "V": 1, "B": -3, "Z": -1, "X": -1, "*": -4}, 8.18 +"F": {"A": -2, "R": -3, "N": -3, "D": -3, "C": -2, "Q": -3, "E": -3, "G": -3, "H": -1, "I": 0, "L": 0, "K": -3, "M": 0, "F": 6, "P": -4, "S": -2, "T": -2, "W": 1, "Y": 3, "V": -1, "B": -3, "Z": -3, "X": -1, "*": -4}, 8.19 +"P": {"A": -1, "R": -2, "N": -2, "D": -1, "C": -3, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -3, "L": -3, "K": -1, "M": -2, "F": -4, "P": 7, "S": -1, "T": -1, "W": -4, "Y": -3, "V": -2, "B": -2, "Z": -1, "X": -2, "*": -4}, 8.20 +"S": {"A": 1, "R": -1, "N": 1, "D": 0, "C": -1, "Q": 0, "E": 0, "G": 0, "H": -1, "I": -2, "L": -2, "K": 0, "M": -1, "F": -2, "P": -1, "S": 4, "T": 1, "W": -3, "Y": -2, "V": -2, "B": 0, "Z": 0, "X": 0, "*": -4}, 8.21 +"T": {"A": 0, "R": -1, "N": 0, "D": -1, "C": -1, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S": 1, "T": 5, "W": -2, "Y": -2, "V": 0, "B": -1, "Z": -1, "X": 0, "*": -4}, 8.22 +"W": {"A": -3, "R": -3, "N": -4, "D": -4, "C": -2, "Q": -2, "E": -3, "G": -2, "H": -2, "I": -3, "L": -2, "K": -3, "M": -1, "F": 1, "P": -4, "S": -3, "T": -2, "W": 11, "Y": 2, "V": -3, "B": -4, "Z": -3, "X": -2, "*": -4}, 8.23 +"Y": {"A": -2, "R": -2, "N": -2, "D": -3, "C": -2, "Q": -1, "E": -2, "G": -3, "H": 2, "I": -1, "L": -1, "K": -2, "M": -1, "F": 3, "P": -3, "S": -2, "T": -2, "W": 2, "Y": 7, "V": -1, "B": -3, "Z": -2, "X": -1, "*": -4}, 8.24 +"V": {"A": 0, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -2, "E": -2, "G": -3, "H": -3, "I": 3, "L": 1, "K": -2, "M": 1, "F": -1, "P": -2, "S": -2, "T": 0, "W": -3, "Y": -1, "V": 4, "B": -3, "Z": -2, "X": -1, "*": -4}, 8.25 +"B": {"A": -2, "R": -1, "N": 3, "D": 4, "C": -3, "Q": 0, "E": 1, "G": -1, "H": 0, "I": -3, "L": -4, "K": 0, "M": -3, "F": -3, "P": -2, "S": 0, "T": -1, "W": -4, "Y": -3, "V": -3, "B": 4, "Z": 0, "X": -1, "*": -4}, 8.26 +"Z": {"A": -1, "R": 0, "N": 0, "D": 1, "C": -3, "Q": 3, "E": 4, "G": -2, "H": 0, "I": -3, "L": -3, "K": 1, "M": -1, "F": -3, "P": -1, "S": 0, "T": -1, "W": -3, "Y": -2, "V": -2, "B": 0, "Z": 4, "X": -1, "*": -4}, 8.27 +"X": {"A": 0, "R": -1, "N": -1, "D": -1, "C": -2, "Q": -1, "E": -1, "G": -1, "H": -1, "I": -1, "L": -1, "K": -1, "M": -1, "F": -1, "P": -2, "S": 0, "T": 0, "W": -2, "Y": -1, "V": -1, "B": -1, "Z": -1, "X": -1, "*": -4}, 8.28 +"*": {"A": -4, "R": -4, "N": -4, "D": -4, "C": -4, "Q": -4, "E": -4, "G": -4, "H": -4, "I": -4, "L": -4, "K": -4, "M": -4, "F": -4, "P": -4, "S": -4, "T": -4, "W": -4, "Y": -4, "V": -4, "B": -4, "Z": -4, "X": -4, "*": 1} 8.29 +} 8.30 + 8.31 +gaps = (-8, -4, -2, -1)
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/allpy/data/codes.py Tue Jan 25 16:03:00 2011 +0300 9.3 @@ -0,0 +1,1209 @@ 9.4 +"""Tables of monomer codes. 9.5 + 9.6 +`dna`, `rna`, `protein` are lists of all known codes for monomers of given 9.7 +type. Each of them is a list of tuples of kind: 9.8 + 9.9 + ( 1-letter code, is-modified?, 3-letter-code, fullname ) 9.10 + 9.11 +`3-letter-code` is the code used in PDB (it may actually be one or 9.12 +two letters) 9.13 + 9.14 +""" 9.15 + 9.16 +dna = ( 9.17 +('A', False, "DA", "2'-DEOXYADENOSINE-5'-MONOPHOSPHATE"), 9.18 +('C', False, "DC", "2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE"), 9.19 +('G', False, "DG", "2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.20 +('T', False, "DT", "THYMIDINE-5'-MONOPHOSPHATE"), 9.21 +('N', False, "", "Undefined DNA Nucleotide"), 9.22 +('a', True, "0AM", "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"), 9.23 +('a', True, "0AV", "2'-O-methyladenosine 5'-(dihydrogen phosphate)"), 9.24 +('a', True, "0SP", "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"), 9.25 +('a', True, "1AP", "2,6-DIAMINOPURINE"), 9.26 +('a', True, "2AR", "2'-DEOXYARISTEROMYCIN-5'-PHOSPHATE"), 9.27 +('a', True, "2BU", "(2S,3S)-N6-(2,3,4-TRIHYDROXYBUTYL)-2'-DEOXYADENOSINE MONO PHOSPHORIC ACID"), 9.28 +('a', True, "2DA", "2',3'-DIDEOXYADENOSINE-5'-MONOPHOSPHATE"), 9.29 +('a', True, "3DA", "3'-DEOXYADENOSINE-5'-MONOPHOSPHATE"), 9.30 +('a', True, "5AA", "N6-DIMETHYL-3'-AMINO-ADENOSINE-5'-MONOPHOSPHATE"), 9.31 +('a', True, "6HA", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(ADENIN-9-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"), 9.32 +('a', True, "7DA", "7-DEAZA-2'-DEOXYADENOSINE-5'-MONOPHOSPHATE"), 9.33 +('a', True, "A34", "N6-METHYL DEOXYADENOSINE 5'-MONOPHOSPHATE"), 9.34 +('a', True, "A35", "2-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"), 9.35 +('a', True, "A38", "8-OXY DEOXYADENOSINE-5'-MONOPHOSPHATE"), 9.36 +('a', True, "A3A", "2'DEOXY-ALPHA-ANOMERIC-ADENOSINE-5'-PHOSPHATE"), 9.37 +('a', True, "A40", "N2-METHYL 2'-DEOXYADENOSINE 5'-MONOPHOSPHATE"), 9.38 +('a', True, "A43", "3'-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"), 9.39 +('a', True, "A47", "N6-METHOXY ADENOSINE 5'-MONOPHOSPHATE"), 9.40 +('a', True, "ABR", "(R)-(N-PHENYL-2-HYDROXY-ETHYL)-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"), 9.41 +('a', True, "ABS", "(S)-(N-PHENYL-2-HYDROXY-ETHYL)-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"), 9.42 +('a', True, "AD2", "2'-DEOXY-ADENOSINE-3'-5'-DIPHOSPHATE"), 9.43 +('a', True, "AP7", "N1-PROTONATED ADENOSINE-5'-MONOPHOSPHATE"), 9.44 +('a', True, "APN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-ADENINE"), 9.45 +('a', True, "AS", "2-DEOXY-ADENOSINE -5'-THIO-MONOPHOSPHATE"), 9.46 +('a', True, "E", "N-((-)-(7S,8R,9S,10R)[7,8,9-TRIHYDROXY-7,8,9,10-TETRA HYDROBENZO[A]PYREN-10-YL])-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"), 9.47 +('a', True, "E1X", "PHOSPHORIC"), 9.48 +('a', True, "EDA", "3-[2-DEOXY-RIBOFURANOSYL]-3H-1,3,4,5A,8-PENTAAZA-AS-INDACENE-5'-MONOPHOSPHATE"), 9.49 +('a', True, "FA2", "5-(6-AMINO-9H-PURIN-9-YL)-4-HYDROXYTETRAHYDROFURAN-3-YL"), 9.50 +('a', True, "MA7", "1N-METHYLADENOSINE-5'-MONOPHOSPHATE"), 9.51 +('a', True, "PRN", "PURINE 2'-DEOXYRIBO-5'-MONOPHOSPHATE"), 9.52 +('a', True, "R", "2'-DEOXY-N6-(R)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"), 9.53 +('a', True, "RMP", "2'-DEOXY-ADENOSINE-5'-RP-MONOMETHYLPHOSPHONATE"), 9.54 +('a', True, "S4A", "2'-deoxy-4'-thioadenosine 5'-(dihydrogen phosphate)"), 9.55 +('a', True, "SMP", "2'-DEOXY-ADENOSINE-5'-SP-MONOMETHYLPHOSPHONATE"), 9.56 +('a', True, "TCY", "(2R,3AS,4AR,5AR,5BS)-2-(6-AMINO-9H-PURIN-9-YL)-3A-HYDROXYHEXAHYDROCYCLOPROPA[4,5]CYCLOPENTA[1,2-B]FURAN-5A(4H)-YL"), 9.57 +('a', True, "TFO", "[2-(6-AMINO-9H-PURIN-9-YL)-1-METHYLETHOXY]METHYLPHOSPHONIC"), 9.58 +('a', True, "XAD", "9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-9H-PURIN-6-AMINE"), 9.59 +('a', True, "XAL", "[(1S,4R,6R)-6-HYDROXY-4-(ADENIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"), 9.60 +('a', True, "Y", "2'-DEOXY-N6-(S)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"), 9.61 +('c', True, "0AP", "2'-deoxycytidine 3',5'-bis(dihydrogen phosphate)"), 9.62 +('c', True, "4PC", "3-(2'-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3,7-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE"), 9.63 +('c', True, "4PD", "3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-1,3-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE"), 9.64 +('c', True, "4PE", "3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3H-PYRROLO[2,3-D]PYRIMIDIN-2-OL"), 9.65 +('c', True, "4SC", "4'-THIO-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE GROUP"), 9.66 +('c', True, "5CM", "5-METHYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.67 +('c', True, "5FC", "5-FORMYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.68 +('c', True, "5NC", "5-AZA-CYTIDINE-5'MONOPHOSPHATE"), 9.69 +('c', True, "5PC", "5(1-PROPYNYL)-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE"), 9.70 +('c', True, "6HC", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(CYTOSIN-1-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"), 9.71 +('c', True, "C2S", "CYTIDINE-5'-DITHIOPHOSPHORATE"), 9.72 +('c', True, "C32", "5-BROMO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.73 +('c', True, "C34", "N4-METHYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.74 +('c', True, "C36", "5-METHYL-5-FLUORO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.75 +('c', True, "C37", "5-FLUORO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.76 +('c', True, "C38", "5-IODO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.77 +('c', True, "C42", "3'-AMINO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.78 +('c', True, "C45", "N4-METHOXY-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.79 +('c', True, "C46", "6H,8H-3,4-DIHYDROPYRIMIDO[4,5-C][1,2]OXAZIN-7-0NE(CYTIDINE)-5'-MONOPHOSPHATE"), 9.80 +('c', True, "C49", "4-THIO,5-FLUORO,5-METHYL-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.81 +('c', True, "C4S", "2'-deoxy-4'-thiocytidine 5'-(dihydrogen phosphate)"), 9.82 +('c', True, "CAR", "CYTOSINE ARABINOSE-5'-PHOSPHATE"), 9.83 +('c', True, "CB2", "PHOSPHORIC"), 9.84 +('c', True, "CBR", "5-BROMO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.85 +('c', True, "CFL", "4-AMINO-1-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)PYRIMIDIN-2(1H)-ONE"), 9.86 +('c', True, "CMR", "2'-DEOXY-CYTIDINE-5'-RP-MONOMETHYLPHOSPHONATE"), 9.87 +('c', True, "CP1", "2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE"), 9.88 +('c', True, "CPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE"), 9.89 +('c', True, "CSL", "(D)-2'-METHYLSELENYL-2'-DEOXYCYTIDINE-5'-PHOSPHATE"), 9.90 +('c', True, "DCT", "2',3'-DIDEOXYCYTIDINE 5'-TRIPHOSPHATE"), 9.91 +('c', True, "DFC", "2'-DEOXY-L-RIBO-FURANOSYL CYTOSINE-5'-MONOPHOSPHATE"), 9.92 +('c', True, "DNR", "2'-DEOXY-N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"), 9.93 +('c', True, "DOC", "2',3'-DIDEOXYCYTIDINE-5'-MONOPHOSPHATE"), 9.94 +('c', True, "GCK", "PHOSPHORIC ACID 9-(2-GUANIDINOETHOXY-3-(2-DEOXY-BETA-D-ERYTHROPENTOFURANOSYL))-3H-PYRIMIDO-[5,4-B][1,4]-BENZOOXAZIN-2-ONE]-5'-ESTER"), 9.95 +('c', True, "I5C", "5-IODO-2'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.96 +('c', True, "IMC", "N1-[2-DEOXY-RIBOFURANOSYL]-[2-AMINO-5-METHYL-4-OXO-4H-PYRIMIDINE]-5'-MONOPHOSPHATE"), 9.97 +('c', True, "MCY", "5-METHYL-2'-DEOXYCYTIDINE"), 9.98 +('c', True, "SC", "2-DEOXY-CYTIDINE-5'-THIOPHOSPHORATE"), 9.99 +('c', True, "TC1", "3-(5-PHOSPHO-2-DEOXY-BETA-D-RIBOFURANOSYL)-2-OXO-1,3-DIAZA-PHENOTHIAZINE"), 9.100 +('c', True, "TPC", "5'-THIO-2'-DEOXY-CYTOSINE PHOSPHONIC ACID"), 9.101 +('c', True, "XCL", "[(1S,4R,6R)-6-HYDROXY-4-(CYTOSIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"), 9.102 +('c', True, "XCT", "4-AMINO-1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)PYRIMIDIN-2(1H)-ONE"), 9.103 +('c', True, "XCY", "{5-[4-{[4-(AMINOMETHYL)BENZYL]AMINO}-2-OXOPYRIMIDIN-1(2H)-"), 9.104 +('c', True, "Z", "ZEBULARINE"), 9.105 +('g', True, "0AD", "2'-deoxy-N-propylguanosine 5'-(dihydrogen phosphate)"), 9.106 +('g', True, "2EG", "2'-DEOXY-N-ETHYLGUANOSINE 5'-PHOSPHATE"), 9.107 +('g', True, "2PR", "2-AMINO-9-[2-DEOXYRIBOFURANOSYL]-9H-PURINE-5'-MONOPHOSPHATE"), 9.108 +('g', True, "5CG", "5'-CHLORO-5'-DEOXY-GUANOSINE"), 9.109 +('g', True, "6HG", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(GUANIN-9-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"), 9.110 +('g', True, "6OG", "6-O-METHYL GUANOSINE-5'-MONOPHOSPHATE"), 9.111 +('g', True, "7GU", "7-DEAZA-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.112 +('g', True, "8FG", "N-(5'-PHOSPHO-2'-DEOXYGUANOSIN-8-YL)-2-ACETYLAMINOFLUORENE"), 9.113 +('g', True, "8MG", "8-METHYL-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.114 +('g', True, "8OG", "8-OXO-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.115 +('g', True, "AFG", "N-(5'-PHOSPHO-2'-DEOXYGUANOSIN-8-YL)-2-AMINOFLUORENE"), 9.116 +('g', True, "BGM", "8-BROMO-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.117 +('g', True, "DCG", "2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.118 +('g', True, "DDG", "2',3'-DIDEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.119 +('g', True, "DFG", "2'-DEOXY-L-RIBO-FURANOSYL GUANINE-5'-MONOPHOSPHATE"), 9.120 +('g', True, "DGI", "2'-DEOXYGUANOSINE-5'-DIPHOSPHATE"), 9.121 +('g', True, "EDC", "N3,N4-ETHENO-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE"), 9.122 +('g', True, "FMG", "2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-7-METHYL-6-OXO-6,9-DIHYDRO-1H-PURIN-7-IUM"), 9.123 +('g', True, "FOX", "((1R,2S,4R)-4-{[2-AMINO-5-(FORMYLAMINO)-6-OXO-3,6-DIHYDROPYRIMIDIN-4-YL]AMINO}-2-HYDROXYCYCLOPENTYL)METHYL 5'-PHOSPHATE"), 9.124 +('g', True, "G2S", "GUANOSINE-5'-DITHIOPHOSPHORATE"), 9.125 +('g', True, "G31", "3'-METHYL-2',3'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.126 +('g', True, "G32", "O6-METHYL-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.127 +('g', True, "G33", "8-METHYL-2'-DEOXYGUANOSINE 3'-MONOPHOSPHATE"), 9.128 +('g', True, "G36", "O6-ETHYL-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.129 +('g', True, "G38", "3'-AMINO-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.130 +('g', True, "G42", "8-OXO-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.131 +('g', True, "G47", "N2-ETHANETHIOL-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.132 +('g', True, "G49", "N2-METHYL-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.133 +('g', True, "GDR", "GUANOSINE-5'-DIPHOSPHATE-RHAMNOSE"), 9.134 +('g', True, "GFL", "2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE"), 9.135 +('g', True, "GMS", "2'-DEOXYGUANOSINE-5'-MONOSELENOPHOSPHATE"), 9.136 +('g', True, "GN7", "N7-2'-DEOXY-GUANOSINE-5'-MONOPHOSPHATE"), 9.137 +('g', True, "GPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-GUANINE"), 9.138 +('g', True, "GS", "GUANOSINE-5'-THIO-MONOPHOSPHATE"), 9.139 +('g', True, "GSR", "2'-DEOXY-N2-(R)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"), 9.140 +('g', True, "GSS", "2'-DEOXY-N2-(S)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"), 9.141 +('g', True, "IGU", "2'-DEOXYISOGUANINE-5'-MONOPHOSPHATE"), 9.142 +('g', True, "LCG", "[(1R,3R,4R,7S)-7-HYDROXY-3-(GUANIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"), 9.143 +('g', True, "LGP", "N9-1-HYDROXY-PROP-2-OXYMETHYL-GUANINE-3'-MONOPHOSPHATE"), 9.144 +('g', True, "M1G", "3-(2-DEOXY-BETA-D-RIBOFURANOSYL)-PYRIDO[5,6-A]-PURINE-10-ONE-5'-MONOPHOSPHATE"), 9.145 +('g', True, "MG1", "2'-DEOXY-1-METHYLGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.146 +('g', True, "MRG", "N2-(3-MERCAPTOPROPYL)-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.147 +('g', True, "P", "2'-DEOXY-N1,N2-PROPANO GUANOSINE MONOPHOSPHATE"), 9.148 +('g', True, "PG7", "((2R,3R,5R)-5-(2-AMINO-6-HYDROXY-9H-PURIN-9-YL)-3-HYDROXY-TETRAHYDROFURAN-2-YL)METHYL"), 9.149 +('g', True, "PGN", "2'-DEOXYGUANOSINE-3',5'-DIPHOSPHATE"), 9.150 +('g', True, "PPW", "7-DEAZA-8-AZA-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.151 +('g', True, "S4G", "2'-deoxy-4'-thioguanosine 5'-(dihydrogen phosphate)"), 9.152 +('g', True, "S6G", "6-THIO-2'-DEOXYGUANOSINE-5'-MONOPHOSPHATE"), 9.153 +('g', True, "SDG", "2-AMINO-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURINE-6-SELENOL"), 9.154 +('g', True, "TGP", "5'-THIO-2'-DEOXY-GUANOSINE PHOSPHONIC ACID"), 9.155 +('g', True, "X", "2'-DEOXY-N7-(8,9-DIHYDRO-9-HYDROXY-10-DEHYDROXY-AFLATOXIN)GUANOSINE MONOPHOSPHATE"), 9.156 +('g', True, "XGL", "[(1S,4R,6R)-6-HYDROXY-4-(GUANIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"), 9.157 +('g', True, "XGU", "2-AMINO-9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE"), 9.158 +('g', True, "XUG", "2'-SE-METHYL-2'-SELENOGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.159 +('n', True, "0AU", "5-iodouridine 5'-(dihydrogen phosphate)"), 9.160 +('n', True, "2BD", "N1-(1-HYDROXY-3-BUTEN-2(S)-YL)-2'-DEOXYINOSINE MONO PHOSPHORIC ACID"), 9.161 +('n', True, "2DF", "N-(2-DEOXY-BETA-D-ERYTHO-PENTOFURANOSYL-5-PHOSPHATE)"), 9.162 +('n', True, "2DM", "2-HYDROXY-3-(PYREN-1-YLMETHOXY)PROPYL"), 9.163 +('n', True, "2FE", "2'-FLUORO-2'-DEOXY-1,N6-ETHENOADENINE"), 9.164 +('n', True, "2FI", "2'-FLUORO-2'-DEOXYINOSINE"), 9.165 +('n', True, "3DR", "1',2'-DIDEOXYRIBOFURANOSE-5'-PHOSPHATE"), 9.166 +('n', True, "3ME", "PHOSPHORIC"), 9.167 +('n', True, "4MF", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-METHYL-1H-INDOLE"), 9.168 +('n', True, "5HU", "5-HYDROXYMETHYLURIDINE-2'-DEOXY-5'-MONOPHOSPHATE"), 9.169 +('n', True, "5IU", "5-IODO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.170 +('n', True, "5MD", "5-METHYL-2'-DEOXYPSEUDOURIDINE"), 9.171 +('n', True, "6MI", "6-METHYL-8-(2-DEOXY-RIBOFURANOSYL)ISOXANTHOPTERIDINE"), 9.172 +('n', True, "A1P", "9-{2-DEOXY-5-O-[HYDROXY(OXIDO)PHOSPHINO]-BETA-L-ERYTHRO-PENTOFURANOSYL}-9H-PURIN-2-AMINE"), 9.173 +('n', True, "ABT", "3'-AZIDO-3'-DEOXY-THYMIDINE-5'-ALPHA BORANO TRIPHOSPHATE"), 9.174 +('n', True, "AFF", "2-ACETYLAMINOFLUORENE-3-YL"), 9.175 +('n', True, "ASU", "4'-THIO-2'4'-DIDEOXYRIBOFURANOSE-5'-PHOSPHATE"), 9.176 +('n', True, "B1P", "2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSE"), 9.177 +('n', True, "BRU", "5-BROMO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.178 +('n', True, "BVP", "(E)-5-(2-BROMOVINYL)-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.179 +('n', True, "BZG", "6-(BENZYLOXY)-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURIN-2-AMINE"), 9.180 +('n', True, "D1P", "2'-DEOXY-RIBOFURANOSE-5'-PHOSPHATE"), 9.181 +('n', True, "D3", "1-(2-DEOXY-BETA-D-RIBOFURANOSYL)-4-(3-BENZAMIDO)PHENYLIMIDAZOLE"), 9.182 +('n', True, "DDN", "3,4-DIHYDRO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.183 +('n', True, "DDX", "2',3'-DEHYDRO-2',3'-DIDEOXYRIBOFURANOSE-5'-PHOSPHATE"), 9.184 +('n', True, "DFT", "1-[2-DEOXYRIBOFURANOSYL]-2,4-DIFLUORO-5-METHYL-BENZENE-5'MONOPHOSPHATE"), 9.185 +('n', True, "DI", "2'-DEOXYINOSINE-5'-MONOPHOSPHATE"), 9.186 +('n', True, "DPY", "2-DEOXYRIBOFURANOSYL-PYRIDINE-2,6-DICARBOXYLIC ACID-5'-MONOPHOSPHATE"), 9.187 +('n', True, "DRM", "{[(1R,2S)-2-(2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOPENTYL]OXY}METHYLPHOSPHONIC"), 9.188 +('n', True, "DRP", "2-DEOXYRIBOFURANOSYL-PYRIDINE-5'-MONOPHOSPHATE"), 9.189 +('n', True, "DRZ", "3',4'-DIHYDROXY-PENTANAL-5'-PHOSPHATE"), 9.190 +('n', True, "DU", "2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.191 +('n', True, "DXD", "(1S,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL"), 9.192 +('n', True, "DXN", "(1R,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL"), 9.193 +('n', True, "FAG", "[1',2'-DIDEOXY[2-AMINO-5-([9-HYDROXY-AFLATOXINB2-8-YL]-FORMYL-AMINO)-6-OXO-1,6-IHYDRO-PYRIMIDIN-4-YLAMINO]-RIBOFURANOSE]-5-MONOPHOSPHATE GROUP"), 9.194 +('n', True, "FFD", "(1R)-1,4-ANHYDRO-2-DEOXY-1-(3-FLUOROPHENYL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL"), 9.195 +('n', True, "GMU", "2'-O-[(2-GUANIDINIUM)ETHYL]-5-METHYLURIDINE 5'-MONOPHOSPHATE"), 9.196 +('n', True, "GNE", "1,N2-ETHENOGUANINE"), 9.197 +('n', True, "HDP", "[(1S,6S)-6-HYDROXY-4-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOHEX-2-EN-1-YL]METHYL"), 9.198 +('n', True, "HEU", "3-(2-HYDROXYETHYL)-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.199 +('n', True, "HOB", "CHOLEST-5-EN-3-YL"), 9.200 +('n', True, "HOL", "CHOLEST-5-EN-3-YL"), 9.201 +('n', True, "IPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-5-IODOURACIL"), 9.202 +('n', True, "LCC", "[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"), 9.203 +('n', True, "LCH", "[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"), 9.204 +('n', True, "LKC", "4-AMINO-1-[(1S,3R,4R,7S)-7-HYDROXY-1-(HYDROXYMETHYL)-2,5-DIOXABICYCLO[2.2.1]HEPT-3-YL]-5-METHYLPYRIMIDIN-2(1H)-ONE"), 9.205 +('n', True, "MBZ", "1-[2-DEOXYRIBOFURANOSYL]-4-METHYL-BENZOIMIDAZOLE-5'-MONOPHOSPHATE"), 9.206 +('n', True, "MDR", "9-(2-DEOXY-BETA-D-RIBOFURANOSYL)-6-METHYLPURINE"), 9.207 +('n', True, "N5I", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE"), 9.208 +('n', True, "NCX", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE-3-CARBOXAMIDE"), 9.209 +('n', True, "NDN", "2'-DEOXY-5-NITROURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.210 +('n', True, "NP3", "1-[2-DEOXY-RIBOFURANOSYL]-1H-[3-NITRO-PYRROL]-5'-PHOSPHATE"), 9.211 +('n', True, "NYM", "3'-DEOXY-3'-AMINOTHYMIDINE MONOPHOSPHATE"), 9.212 +('n', True, "O2C", "3'-DEOXY-CYTIDINE-5'-MONOPHOSPHATE"), 9.213 +('n', True, "OIP", "2'-DEOXY-INOSINIC ACID"), 9.214 +('n', True, "P2U", "2'-DEOXY-PSEUDOURIDINE-5'MONOPHOSPHATE"), 9.215 +('n', True, "PBT", "[3-HYDROXY-5-(5-METHYL-2,4-DIOXOTETRAHYDRO-1(2H)-PYRIMIDINYL)TETRAHYDRO-2-FURANYL]METHYL"), 9.216 +('n', True, "PDU", "5(1-PROPYNYL)-2'-DEOXYURIDINE-5-MONOPHOSPHATE"), 9.217 +('n', True, "T", "THYMIDINE-5'-MONOPHOSPHATE"), 9.218 +('n', True, "T2T", "[(2S,3S,5R)-3-[(2S)-3-({[(2R,3S,4R,5R)-3-HYDROXY-4-METHOXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL}AMINO)-2-METHYL-3-OXOPROPYL]-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL DIHYDROGEN PHOSPHATE"), 9.219 +('n', True, "THX", "PHOSPHONIC ACID 6-({6-[6-(6-CARBAMOYL-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONYL)-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDOCENE-2-CARBONYL]-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONL}-AMINO)-HEXYL ESTER 5-(5-METHYL-2,4-DIOXO-3,4-DIHYDRO-2H-PYRIMIDIN-1-YL)-TETRAHYDRO-FURAN-2-YLMETHYL ESTER"), 9.220 +('n', True, "TLN", "[(1R,3R,4R,7S)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"), 9.221 +('n', True, "TS", "THYMIDINE-5'-THIOPHOSPHATE"), 9.222 +('n', True, "TT", "[(1R,3R,4S,9R,10S,12R,15AS,15BR,18BR,18CS)-10-HYDROXY-15A,15B-DIMETHYL-13,15,16,18-TETRAOXOHEXADECAHYDRO-8H-9,12-EPOXY-1,4-METHANO-2,5,7-TRIOXA-12A,14,17,18A-TETRAAZACYCLOHEXADECA[1,2,3,4-DEF]BIPHENYLEN-3-YL]METHYL DIHYDROGEN PHOSPHATE"), 9.223 +('n', True, "U2N", "2'-AMINO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.224 +('n', True, "U33", "5-BROMO-2'-DEOXY URIDINE"), 9.225 +('n', True, "UCL", "5-CHLORO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.226 +('n', True, "UFP", "5-FLUORO-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.227 +('n', True, "UFR", "2'-DEOXY-5-FORMYLURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.228 +('n', True, "UFT", "2'-deoxy-2'-fluorouridine 5'-(dihydrogen phosphate)"), 9.229 +('n', True, "UMS", "2'-METHYLSELENYL-2'-DEOXYURIDINE-5'-PHOSPHATE"), 9.230 +('n', True, "US1", "2'-DEOXY-3'-THIOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.231 +('n', True, "X4A", "[(2R,3S,5S)-2,3,5-TRIHYDROXYTETRAHYDROFURAN-2-YL]METHYL"), 9.232 +('n', True, "XAE", "3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3H-IMIDAZO[4,5-G]QUINAZOLIN-8-AMINE"), 9.233 +('n', True, "XAR", "[(1R,4S,6S)-4-(6-AMINO-9H-PURIN-9-YL)-6-HYDROXYCYCLOHEX-2-EN-1-YL]METHYL"), 9.234 +('n', True, "XCS", "(1R)-1-(4-AMINO-6-METHYL-2-OXO-1,2-DIHYDROQUINAZOLIN-8-YL)-1,4-ANHYDRO-2-DEOXY-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL"), 9.235 +('n', True, "XGA", "6-AMINO-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3,7-DIHYDRO-8H-IMIDAZO[4,5-G]QUINAZOLIN-8-ONE"), 9.236 +('n', True, "XTY", "(1R)-1,4-ANHYDRO-2-DEOXY-1-(6-METHYL-2,4-DIOXO-1,2,3,4-TETRAHYDROQUINAZOLIN-8-YL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL"), 9.237 +('n', True, "YRR", "3-HYDROXY-PYRROLIDIN-2-YLMETHYL-MONOPHOSPHATE"), 9.238 +('n', True, "ZDU", "5-(3-AMINOPROPYL)-2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.239 +('t', True, "2AT", "2'-O-ALLYL THYMIDINE-5'-MONOPHOSPHATE"), 9.240 +('t', True, "2BT", "2'-O-BUTYL-THYMIDINE"), 9.241 +('t', True, "2DT", "3'-DEOXYTHYMIDINE-5'-MONOPHOSPHATE"), 9.242 +('t', True, "2GT", "2'-O-PROPARGYL THYMIDINE-5'-MONOPHOSPHATE"), 9.243 +('t', True, "2NT", "2'-O-[2-[HYDROXY(METHYLENEAMINO)OXY]ETHYL THYMIDINE-5'-MONOPHOSPHATE"), 9.244 +('t', True, "2OT", "2'-O-[2-(N,N-DIMETHYLAMINOOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"), 9.245 +('t', True, "2ST", "5-METHYL-2'-SE-METHYL-2'-SELENOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.246 +('t', True, "5AT", "5'-AMINO-5'-DEOXYTHYMIDINE"), 9.247 +('t', True, "5HT", "5-HYDROXY-THYMIDINE"), 9.248 +('t', True, "5IT", "5-IODO-THYMIDINE-5'-PHOSPHATE"), 9.249 +('t', True, "5PY", "1-(2'-DEOXY-5'-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYLPYRIMIDIN-2(1H)-ONE"), 9.250 +('t', True, "64T", "5-HYDROXY-THYMIDINE-5'-MONOPHOSPHATE"), 9.251 +('t', True, "6CT", "PHOSPHORIC"), 9.252 +('t', True, "6HT", "1',5'-ANHYDRO-2',3'-DIDEOXY-2'-(THYMIN-1-YL)-6'-O-PHOSPHORYL-D-ARABINO-HEXITOL"), 9.253 +('t', True, "ATD", "THYMIDINE-3'-PHOSPHATE"), 9.254 +('t', True, "ATL", "[(1S,3R,4S,7R)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"), 9.255 +('t', True, "ATM", "3'-AZIDO-3'-DEOXYTHYMIDINE-5'-MONOPHOSPHATE"), 9.256 +('t', True, "BOE", "2'-O-[2-(BENZYLOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"), 9.257 +('t', True, "CTG", "(5R,6S)-5,6-DIHYDRO-5,6-DIHYDROXYTHYMIDINE-5'-MONOPHOSPHATE"), 9.258 +('t', True, "D3T", "2',3'-DIDEOXY-THYMIDINE-5'-TRIPHOSPHATE"), 9.259 +('t', True, "D4M", "[(5R)-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-2,5-DIHYDROFURAN-2-YL]METHYL"), 9.260 +('t', True, "DPB", "(S)-1-[2'-DEOXY-3',5'-O-(1-PHOSPHONO)BENZYLIDENE-B-D-THREO-PENTOFURANOSYL]THYMINE"), 9.261 +('t', True, "DRT", "2'-DEOXY-L-RIBO-FURANOSYL THYMINE-5'-MONOPHOSPHATE"), 9.262 +('t', True, "EIT", "((3R,4R,5R)-4-(2-(1H-IMIDAZOL-1-YL)ETHOXY)-3-HYDROXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-TETRAHYDROFURAN-2-YL)METHYL"), 9.263 +('t', True, "MMT", "5'-O-(DIMETHYLAMINO)-THYMIDINE"), 9.264 +('t', True, "MTR", "(5-METHYL-6-OXO-1,6-DIHYDRO-PYRIDIN-3-YL)-1,2-DIDEOXY-RIBOFURANOSE-5-MONOPHOSPHATE"), 9.265 +('t', True, "NMS", "1-(O2-(2-METHYLAMINO-2-OXO-ETHYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE"), 9.266 +('t', True, "NMT", "1-(O2-(METHYLCARBAMOYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE"), 9.267 +('t', True, "P2T", "2'-O-PROPYL THYMIDINE-5-MONOPHOSPHATE"), 9.268 +('t', True, "PST", "THYMIDINE-5'-THIOPHOSPHATE"), 9.269 +('t', True, "S2M", "2'-O-[2-(METHOXY)ETHYL]-2-THIOTHYMIDINE-5'-MONOPHOSPHATE"), 9.270 +('t', True, "SPT", "5'-THIO-THYMIDINE PHOSPHONIC ACID"), 9.271 +('t', True, "T32", "6'-ALPHA-METHYL CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"), 9.272 +('t', True, "T36", "SPLIT LINKAGE THYMIDINE 5'-MONOPHOSPHATE"), 9.273 +('t', True, "T37", "3'-AMINO-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"), 9.274 +('t', True, "T3P", "THYMIDINE-3'-PHOSPHATE"), 9.275 +('t', True, "T48", "6'-ALPHA-HYDROXY CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"), 9.276 +('t', True, "T49", "S4'-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"), 9.277 +('t', True, "T4S", "1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-HYDROSELENO-5-METHYLPYRIMIDIN-2(1H)-ONE"), 9.278 +('t', True, "T5S", "2'-deoxy-5-(methylselanyl)uridine 5'-phosphate"), 9.279 +('t', True, "TA3", "(4S,5R)-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYL-1,3-DIAZABICYCLO[2.2.0]HEXAN-2-ONE"), 9.280 +('t', True, "TAF", "2'-DEOXY-2'-FLUORO-ARABINO-FURANOSYL THYMINE-5'-PHOSPHATE"), 9.281 +('t', True, "TCP", "5'-METHYLTHYMIDINE"), 9.282 +('t', True, "TFE", "2'-O-[2-(TRIFLUORO)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"), 9.283 +('t', True, "TFT", "(L)-ALPHA-THREOFURANOSYL-THYMINE-3'-MONOPHOSPHATE"), 9.284 +('t', True, "TLC", "2-O,3-ETHDIYL-ARABINOFURANOSYL-THYMINE-5'-MONOPHOSPHATE"), 9.285 +('t', True, "TP1", "2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE"), 9.286 +('t', True, "TPN", "2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE"), 9.287 +('t', True, "TTD", "CIS-SYN"), 9.288 +('t', True, "TTM", "N3-ETHYL-THYMIDINE-5'-MONOPHOSPHATE"), 9.289 +('t', True, "XTH", "1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-5-METHYLPYRIMIDINE-2,4(1H,3H)-DIONE"), 9.290 +('t', True, "XTL", "[(1S,4R,6R)-6-HYDROXY-4-(THYMIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL"), 9.291 +) 9.292 + 9.293 +protein = ( 9.294 +('A', False, "ALA", "ALANINE"), 9.295 +('C', False, "CYS", "CYSTEINE"), 9.296 +('D', False, "ASP", "ASPARTIC"), 9.297 +('E', False, "GLU", "GLUTAMIC"), 9.298 +('F', False, "PHE", "PHENYLALANINE"), 9.299 +('G', False, "GLY", "GLYCINE"), 9.300 +('H', False, "HIS", "HISTIDINE"), 9.301 +('I', False, "ILE", "ISOLEUCINE"), 9.302 +('K', False, "LYS", "LYSINE"), 9.303 +('L', False, "LEU", "LEUCINE"), 9.304 +('M', False, "MET", "METHIONINE"), 9.305 +('N', False, "ASN", "ASPARAGINE"), 9.306 +('P', False, "PRO", "PROLINE"), 9.307 +('Q', False, "GLN", "GLUTAMINE"), 9.308 +('R', False, "ARG", "ARGININE"), 9.309 +('S', False, "SER", "SERINE"), 9.310 +('T', False, "THR", "THREONINE"), 9.311 +('V', False, "VAL", "VALINE"), 9.312 +('W', False, "TRP", "TRYPTOPHAN"), 9.313 +('X', False, "", "Undefined Aminoacid"), 9.314 +('Y', False, "TYR", "TYROSINE"), 9.315 +('a', True, "0CS", "3-[(S)-HYDROPEROXYSULFINYL]-L-ALANINE"), 9.316 +('a', True, "0NC", "N-METHYL-L-ALANINAMIDE"), 9.317 +('a', True, "AA3", "2-AMINOBUTYRIC"), 9.318 +('a', True, "AA4", "2-AMINO-5-HYDROXYPENTANOIC"), 9.319 +('a', True, "ABA", "ALPHA-AMINOBUTYRIC"), 9.320 +('a', True, "AHO", "N-ACETYL-N-HYDROXY-L-ORNITHINE"), 9.321 +('a', True, "AHP", "2-AMINO-HEPTANOIC"), 9.322 +('a', True, "AIB", "ALPHA-AMINOISOBUTYRIC"), 9.323 +('a', True, "ALC", "2-AMINO-3-CYCLOHEXYL-PROPIONIC"), 9.324 +('a', True, "ALM", "1-METHYL-ALANINAL"), 9.325 +('a', True, "ALN", "NAPHTHALEN-2-YL-3-ALANINE"), 9.326 +('a', True, "ALS", "2-AMINO-3-OXO-4-SULFO-BUTYRIC"), 9.327 +('a', True, "ALT", "THIOALANINE"), 9.328 +('a', True, "APH", "P-AMIDINOPHENYL-3-ALANINE"), 9.329 +('a', True, "AYA", "N-ACETYLALANINE"), 9.330 +('a', True, "B2A", "ALANINE"), 9.331 +('a', True, "B3A", "(3S)-3-AMINOBUTANOIC"), 9.332 +('a', True, "BAL", "BETA-ALANINE"), 9.333 +('a', True, "BNN", "ACETYL-P-AMIDINOPHENYLALANINE"), 9.334 +('a', True, "CAB", "4-CARBOXY-4-AMINOBUTANAL"), 9.335 +('a', True, "CLB", "D-PARA-CHLOROPHENYL-1-ACETAMIDOBORONIC"), 9.336 +('a', True, "CLD", "D-PARA-CHLOROPHENYL-1-ACTEAMIDOBORONIC"), 9.337 +('a', True, "DAB", "2,4-DIAMINOBUTYRIC"), 9.338 +('a', True, "DAL", "D-ALANINE"), 9.339 +('a', True, "DBU", "(2E)-2-AMINOBUT-2-ENOIC"), 9.340 +('a', True, "DBZ", "3-(BENZOYLAMINO)-L-ALANINE"), 9.341 +('a', True, "DHA", "2-AMINO-ACRYLIC"), 9.342 +('a', True, "DNP", "3-AMINO-ALANINE"), 9.343 +('a', True, "DPP", "DIAMMINOPROPANOIC"), 9.344 +('a', True, "FLA", "TRIFLUOROALANINE"), 9.345 +('a', True, "HAC", "BETA-CYCLOHEXYL-ALANINE"), 9.346 +('a', True, "HMF", "2-AMINO-4-PHENYL-BUTYRIC"), 9.347 +('a', True, "HV5", "TERT-BUTYLALANINE"), 9.348 +('a', True, "IAM", "4-[(ISOPROPYLAMINO)METHYL]PHENYLALANINE"), 9.349 +('a', True, "KYN", "KYNURENINE"), 9.350 +('a', True, "LAL", "N,N-DIMETHYL-L-ALANINE"), 9.351 +('a', True, "MA", "METHYL"), 9.352 +('a', True, "MAA", "N-METHYLALANINE"), 9.353 +('a', True, "MSP", "5'-O-[(L-METHIONYL)-SULPHAMOYL]ADENOSINE"), 9.354 +('a', True, "NAL", "BETA-(2-NAPHTHYL)-ALANINE"), 9.355 +('a', True, "NAM", "NAM"), 9.356 +('a', True, "NCB", "N-CARBAMOYL-ALANINE"), 9.357 +('a', True, "ORN", "ORNITHINE"), 9.358 +('a', True, "PAU", "PANTOTHENOIC"), 9.359 +('a', True, "PRR", "3-(METHYL-PYRIDINIUM)ALANINE"), 9.360 +('a', True, "PYA", "3-(1,10-PHENANTHROL-2-YL)-L-ALANINE"), 9.361 +('a', True, "S2P", "(2S)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC"), 9.362 +('a', True, "SEC", "2-AMINO-3-SELENINO-PROPIONIC"), 9.363 +('a', True, "SEG", "HYDROXYALANINE"), 9.364 +('a', True, "TIH", "BETA(2-THIENYL)ALANINE"), 9.365 +('a', True, "UMA", "URIDINE-5'-DIPHOSPHATE-N-ACETYLMURAMOYL-L-ALANINE"), 9.366 +('c', True, "0A8", "S-[(2-CHLOROETHYL)CARBAMOYL]-L-CYSTEINE"), 9.367 +('c', True, "143", "S-2,3-DIHYDRO-5-GLYCIN-2-YL-ISOXAZOL-3-YL-CYSTEINE"), 9.368 +('c', True, "2CO", "S-HYDROPEROXYCYSTEINE"), 9.369 +('c', True, "5CS", "2-AMINO-3-(CYSTEIN-S-YL)-ISOXAZOLIDIN-5-YL-ACETIC"), 9.370 +('c', True, "BBC", "3-[(4-AMINOBUTYL)SULFINYL]-2-IMINOPROPAN-1-OL"), 9.371 +('c', True, "BCS", "BENZYLCYSTEINE"), 9.372 +('c', True, "BCX", "BETA-3-CYSTEINE"), 9.373 +('c', True, "BPE", "(2S)-2-AMINO-3-[(3-AMINOPROPYL)SULFANYL]PROPAN-1-OL"), 9.374 +('c', True, "BTC", "CYSTEINE"), 9.375 +('c', True, "BUC", "S,S-BUTYLTHIOCYSTEINE"), 9.376 +('c', True, "C3Y", "S-[(1S)-1-HYDROXY-1-(HYDROXYAMINO)ETHYL]-L-CYSTEINE"), 9.377 +('c', True, "C5C", "S-CYCLOPENTYL"), 9.378 +('c', True, "C6C", "S-CYCLOHEXYL"), 9.379 +('c', True, "CAF", "S-DIMETHYLARSINOYL-CYSTEINE"), 9.380 +('c', True, "CAS", "S-(DIMETHYLARSENIC)CYSTEINE"), 9.381 +('c', True, "CAY", "CARBOXYMETHYLENECYSTEINE"), 9.382 +('c', True, "CCS", "CARBOXYMETHYLATED"), 9.383 +('c', True, "CEA", "S-HYDROXY-CYSTEINE"), 9.384 +('c', True, "CME", "S,S-(2-HYDROXYETHYL)THIOCYSTEINE"), 9.385 +('c', True, "CMH", "S-(METHYLMERCURY)-L-CYSTEINE"), 9.386 +('c', True, "CML", "(2S)-2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}BUTANEDIOIC"), 9.387 +('c', True, "CMT", "O-METHYLCYSTEINE"), 9.388 +('c', True, "CS0", "S-(2-HYDROXYETHYL)-L-CYSTEINE"), 9.389 +('c', True, "CS1", "S-(2-ANILINYL-SULFANYL)-CYSTEINE"), 9.390 +('c', True, "CS3", "S-[3-OXO-3-(2-THIENYL)PROPYL]-L-CYSTEINE"), 9.391 +('c', True, "CS4", "S-[3-(3,4-DICHLOROPHENYL)-3-OXOPROPYL]-L-CYSTEINE"), 9.392 +('c', True, "CSA", "S-ACETONYLCYSTEINE"), 9.393 +('c', True, "CSB", "CYS"), 9.394 +('c', True, "CSD", "3-SULFINOALANINE"), 9.395 +('c', True, "CSE", "SELENOCYSTEINE"), 9.396 +('c', True, "CSO", "S-HYDROXYCYSTEINE"), 9.397 +('c', True, "CSP", "S-PHOSPHOCYSTEINE"), 9.398 +('c', True, "CSR", "S-ARSONOCYSTEINE"), 9.399 +('c', True, "CSS", "S-MERCAPTOCYSTEINE"), 9.400 +('c', True, "CSU", "CYSTEINE-S-SULFONIC"), 9.401 +('c', True, "CSW", "CYSTEINE-S-DIOXIDE"), 9.402 +('c', True, "CSX", "S-OXY"), 9.403 +('c', True, "CSZ", "S-SELANYL"), 9.404 +('c', True, "CY0", "S-{3-[(4-ANILINOQUINAZOLIN-6-YL)AMINO]-3-OXOPROPYL}-L-CYSTEINE"), 9.405 +('c', True, "CY1", "ACETAMIDOMETHYLCYSTEINE"), 9.406 +('c', True, "CY3", "2-AMINO-3-MERCAPTO-PROPIONAMIDE"), 9.407 +('c', True, "CY4", "S-BUTYRYL-CYSTEIN"), 9.408 +('c', True, "CYA", "TWO"), 9.409 +('c', True, "CYD", "2-AMINO-6-(CYSTEIN-S-YL)-5-OXO-HEXANOIC"), 9.410 +('c', True, "CYF", "5-[2-(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETYLAMINO]-2-(3,6-DIHYDROXY-9,9A-DIHYDRO-3H-XANTHEN-9-YL)-BENZOIC"), 9.411 +('c', True, "CYG", "2-AMINO-4-(AMINO-3-OXO-PROPYLSULFANYLCARBONYL)-BUTYRIC"), 9.412 +('c', True, "CYM", "S-METHYLCYSTEINE"), 9.413 +('c', True, "CYQ", "2-AMINO-3-PHOSPHONOMETHYLSULFANYL-PROPIONIC"), 9.414 +('c', True, "CYR", "N~5~-[{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}(IMINIO)METHYL]-L-ORNITHINATE"), 9.415 +('c', True, "CZ2", "S-(DIHYDROXYARSINO)CYSTEINE"), 9.416 +('c', True, "CZZ", "THIARSAHYDROXY-CYSTEINE"), 9.417 +('c', True, "DCY", "D-CYSTEINE"), 9.418 +('c', True, "DYS", "S-[5-(2-AMINOETHYL)-2,3-DIHYDROXYPHENYL]-L-CYSTEINE"), 9.419 +('c', True, "EFC", "S,S-(2-FLUOROETHYL)THIOCYSTEINE"), 9.420 +('c', True, "FOE", "2-(2-AMINO-3-OXO-PROPYLSULFANYL)-N-(4-FLUORO-PHENYL)-N-ISOPROPYL-ACETAMIDE"), 9.421 +('c', True, "GT9", "S-NONYL-CYSTEINE"), 9.422 +('c', True, "HTI", "(4S)-4-{[(2S)-2-AMINO-3-OXOPROPYL]SULFANYL}-L-HOMOSERINE"), 9.423 +('c', True, "K1R", "(2S)-2-AMINO-4-[({[(2R)-2-AMINO-2-CARBOXYETHYL]THIO}AMINO)SULFINYL]BUTANOIC"), 9.424 +('c', True, "M0H", "S-(HYDROXYMETHYL)-L-CYSTEINE"), 9.425 +('c', True, "MCS", "MALONYL"), 9.426 +('c', True, "NPH", "CYSTEINE-METHYLENE-CARBAMOYL-1,10-PHENANTHROLINE"), 9.427 +('c', True, "NYS", "S-{5-[(1R)-2-AMINO-1-HYDROXYETHYL]-2,3-DIHYDROXYPHENYL}-L-CYSTEINE"), 9.428 +('c', True, "OCS", "CYSTEINESULFONIC"), 9.429 +('c', True, "OCY", "HYDROXYETHYLCYSTEINE"), 9.430 +('c', True, "P1L", "S-PALMITOYL-L-CYSTEINE"), 9.431 +('c', True, "PBB", "S-(4-BROMOBENZYL)CYSTEINE"), 9.432 +('c', True, "PEC", "S,S-PENTYLTHIOCYSTEINE"), 9.433 +('c', True, "PR3", "S,S-PROPYLTHIOCYSTEINE"), 9.434 +('c', True, "PYX", "S-[S-THIOPYRIDOXAMINYL]CYSTEINE"), 9.435 +('c', True, "R1A", "3-{[(2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE"), 9.436 +('c', True, "R1B", "3-{[(2,2,4,5,5-PENTAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-L-ALANINE"), 9.437 +('c', True, "R1F", "3-{[(2,2,5,5-TETRAMETHYL-1-OXO-4-PHENYL-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE"), 9.438 +('c', True, "R7A", "3-S-[(4-BROMO-2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROL-3-YL)METHYL]SULFANYL-L-CYSTEINE"), 9.439 +('c', True, "RCY", "S-[(3S,3'R)-1'-HYDROXY-2',2',5',5'-TETRAMETHYL-2,5-DIOXO-1,3'-BIPYRROLIDIN-3-YL]-L-CYSTEINE"), 9.440 +('c', True, "SAH", "S-ADENOSYL-L-HOMOCYSTEINE"), 9.441 +('c', True, "SCH", "S-METHYL-THIO-CYSTEINE"), 9.442 +('c', True, "SCS", "3-(ETHYLDISULFANYL)-L-ALANINE"), 9.443 +('c', True, "SCY", "S-ACETYL-CYSTEINE"), 9.444 +('c', True, "SHC", "S-HEXYLCYSTEINE"), 9.445 +('c', True, "SIB", "(2S)-2-AMINO-4-({[(2S,3S,4R,5R)-3,4-DIHYDROXY-5-(6-OXO-1,6-DIHYDRO-9H-PURIN-9-YL)TETRAHYDROFURAN-2-YL]METHYL}THIO)BUTANOIC"), 9.446 +('c', True, "SMC", "S-METHYLCYSTEINE"), 9.447 +('c', True, "SNC", "S-NITROSO-CYSTEINE"), 9.448 +('c', True, "SOC", "DIOXYSELENOCYSTEINE"), 9.449 +('c', True, "SYS", "3-[(2-AMINO-2-OXOETHYL)SELANYL]-L-ALANINE"), 9.450 +('c', True, "TNB", "S-(2,3,6-TRINITROPHENYL)CYSTEINE"), 9.451 +('c', True, "YCM", "S-(2-AMINO-2-OXOETHYL)-L-CYSTEINE"), 9.452 +('d', True, "0A0", "2-METHYL-L-ASPARTIC"), 9.453 +('d', True, "0AK", "(2S)-2-AMINO-4-(2-CHLOROETHOXY)-4-OXOBUTANOIC"), 9.454 +('d', True, "3MD", "2S,3S-3-METHYLASPARTIC"), 9.455 +('d', True, "ACB", "3-METHYL-ASPARTIC"), 9.456 +('d', True, "AEI", "THREONINE-ASPARTIC"), 9.457 +('d', True, "AKL", "3-AMINO-5-CHLORO-4-OXOPENTANOIC"), 9.458 +('d', True, "AS2", "(2R)-2-AMINO-4-OXOBUTANOIC"), 9.459 +('d', True, "ASA", "ASPARTIC"), 9.460 +('d', True, "ASB", "ASPARTIC"), 9.461 +('d', True, "ASI", "L-ISO-ASPARTATE"), 9.462 +('d', True, "ASK", "DEHYDROXYMETHYLASPARTIC"), 9.463 +('d', True, "ASL", "ASPARTIC"), 9.464 +('d', True, "ASQ", "PHOSPHOASPARTATE"), 9.465 +('d', True, "B3D", "3-AMINOPENTANEDIOIC"), 9.466 +('d', True, "BFD", "ASPARTATE"), 9.467 +('d', True, "BHD", "BETA-HYDROXYASPARTIC"), 9.468 +('d', True, "DAS", "D-ASPARTIC"), 9.469 +('d', True, "DMK", "3,3-DIMETHYL"), 9.470 +('d', True, "DOH", "BETA-HYDROXY"), 9.471 +('d', True, "DSP", "D-ASPARTIC"), 9.472 +('d', True, "IAS", "BETA-ASPARTYL"), 9.473 +('d', True, "LAA", "(3R)-3-HYDROXY-L-ALPHA-ASPARAGINE"), 9.474 +('d', True, "OHS", "O-(CARBOXYSULFANYL)-4-OXO-L-HOMOSERINE"), 9.475 +('d', True, "OXX", "OXALYL-ASPARTYL"), 9.476 +('d', True, "PAS", "2-AMINO-4-OXO-4-PHOSPHONOOXY-BUTYRIC"), 9.477 +('d', True, "PHD", "ASPARTYL"), 9.478 +('d', True, "TAV", "N-METHYL-N-{2-[(2-NAPHTHYLSULFONYL)AMINO]-5-[(2-NAPHTHYLSULFONYL)OXY]BENZOYL}-L-ASPARTIC"), 9.479 +('e', True, "5HP", "PYROGLUTAMIC"), 9.480 +('e', True, "AR4", "2-AMINO-5-(3-FLUORO-3,4-DIHYDROXY-5-HYDROXYMETHYL-TETRAHYDRO-FURAN-2-YLOXY)-5-HYDROXY-PENTANOIC"), 9.481 +('e', True, "B3E", "(3S)-3-AMINOHEXANEDIOIC"), 9.482 +('e', True, "CGA", "CARBOXYMETHYLATED"), 9.483 +('e', True, "CGU", "GAMMA-CARBOXY-GLUTAMIC"), 9.484 +('e', True, "CRU", "4-[(4Z)-1-(CARBOXYMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-2-YL]-4-IMINOBUTANOIC"), 9.485 +('e', True, "DGL", "D-GLUTAMIC"), 9.486 +('e', True, "GAU", "(4S)-4-AMINO-5-HYDROXYPENTANOIC"), 9.487 +('e', True, "GGL", "GAMMA-GLUTAMIC"), 9.488 +('e', True, "GLQ", "4-AMINO-5-OXO-PENTANOIC"), 9.489 +('e', True, "GMA", "4-AMIDO-4-CARBAMOYL-BUTYRIC"), 9.490 +('e', True, "GSU", "O5'-(L-GLUTAMYL-SULFAMOYL)-ADENOSINE"), 9.491 +('e', True, "ILG", "GLUTAMYL"), 9.492 +('e', True, "LME", "(3R)-3-METHYL-L-GLUTAMIC"), 9.493 +('e', True, "MEG", "(2S,3R)-3-METHYL-GLUTAMIC"), 9.494 +('e', True, "NHL", "(4S)-4-(2-NAPHTHYLMETHYL)-D-GLUTAMIC"), 9.495 +('e', True, "PCA", "PYROGLUTAMIC"), 9.496 +('f', True, "0A9", "METHYL"), 9.497 +('f', True, "1PA", "PHENYLMETHYLACETIC"), 9.498 +('f', True, "200", "4-CHLORO-L-PHENYLALANINE"), 9.499 +('f', True, "23F", "(2Z)-2-AMINO-3-PHENYLACRYLIC"), 9.500 +('f', True, "4PH", "4-METHYL-L-PHENYLALANINE"), 9.501 +('f', True, "B1F", "PHENYLALANINE"), 9.502 +('f', True, "B2F", "PHENYLALANINE"), 9.503 +('f', True, "BIF", "(R)-2-AMINO-3-(4-PHENYLCYCLOHEXYL)PROPANOIC"), 9.504 +('f', True, "DAH", "3,4-DIHYDROXYPHENYLALANINE"), 9.505 +('f', True, "DPH", "DEAMINO-METHYL-PHENYLALANINE"), 9.506 +('f', True, "DPN", "D-PHENYLALANINE"), 9.507 +('f', True, "EHP", "3-HYDROXYPHENYLALANINE"), 9.508 +('f', True, "FCL", "3-CHLORO-L-PHENYLALANINE"), 9.509 +('f', True, "FOG", "PHENYLALANINOYL-[1-HYDROXY]-2-PROPYLENE"), 9.510 +('f', True, "FPA", "1,1"), 9.511 +('f', True, "HPC", "3-AMINO-4-PHENYL-BUTAN-2-ONE"), 9.512 +('f', True, "HPE", "HOMOPHENYLALANINE"), 9.513 +('f', True, "HPQ", "HOMOPHENYLALANINYLMETHANE"), 9.514 +('f', True, "IOY", "P-IODO-D-PHENYLALANINE"), 9.515 +('f', True, "MEA", "N-METHYLPHENYLALANINE"), 9.516 +('f', True, "NDF", "N-(CARBOXYCARBONYL)-D-PHENYLALANINE"), 9.517 +('f', True, "NFA", "PHENYLALANINE"), 9.518 +('f', True, "PBF", "PARA-(BENZOYL)-PHENYLALANINE"), 9.519 +('f', True, "PCS", "PHENYLALANYLMETHYLCHLORIDE"), 9.520 +('f', True, "PF5", "2,3,4,5,6-PENTAFLUORO-L-PHENYLALANINE"), 9.521 +('f', True, "PFF", "4-FLUORO-L-PHENYLALANINE"), 9.522 +('f', True, "PHA", "PHENYLALANINAL"), 9.523 +('f', True, "PHI", "IODO-PHENYLALANINE"), 9.524 +('f', True, "PHL", "L-PHENYLALANINOL"), 9.525 +('f', True, "PHM", "PHENYLALANYLMETHANE"), 9.526 +('f', True, "PM3", "2-AMINO-3-(4-PHOSPHONOMETHYL-PHENYL)-PROPIONIC"), 9.527 +('f', True, "PPN", "PARA-NITROPHENYLALANINE"), 9.528 +('f', True, "PSA", "3-HYDROXY-4-AMINO-5-PHENYLPENTANOIC"), 9.529 +('f', True, "SMF", "4-SULFOMETHYL-L-PHENYLALANINE"), 9.530 +('f', True, "T11", "4-[3-(TRIFLUOROMETHYL)DIAZIRIDIN-3-YL]-L-PHENYLALANINE"), 9.531 +('f', True, "TFQ", "4-(2,2,2-TRIFLUOROETHYL)-L-PHENYLALANINE"), 9.532 +('g', True, "0AC", "(4S,5S)-5-AMINO-4-HYDROXY-6-PHENYLHEXANOIC"), 9.533 +('g', True, "2AG", "(2S)-2-AMINOPENT-4-ENOIC"), 9.534 +('g', True, "CHP", "3-CHLORO-4-HYDROXYPHENYLGLYCINE"), 9.535 +('g', True, "CR5", "(2R)-2-(AMINOMETHYL)-2,4-DIHYDROXY-5-OXO-3-(2-OXOETHYL)-2,5-DIHYDRO-1H-IMIDAZOL-3-IUM"), 9.536 +('g', True, "CSI", "AMINO-(2-IMINO-HEXAHYDRO-PYRIMIDIN-4-YL)-ACETIC"), 9.537 +('g', True, "FGL", "2-AMINOPROPANEDIOIC"), 9.538 +('g', True, "GHP", "4-HYDROXYPHENYLGLYCINE"), 9.539 +('g', True, "GL3", "THIOGLYCIN"), 9.540 +('g', True, "GLZ", "AMINO-ACETALDEHYDE"), 9.541 +('g', True, "GSC", "2-ETHYLTHIO"), 9.542 +('g', True, "IGL", "ALPHA-AMINO-2-INDANACETIC"), 9.543 +('g', True, "IPG", "N-ISOPROPYL"), 9.544 +('g', True, "LPG", "L-PROPARGYLGLYCINE"), 9.545 +('g', True, "LVG", "L-VINYLGLYCINE"), 9.546 +('g', True, "MEU", "O-METHYL-GLYCINE"), 9.547 +('g', True, "MGY", "N-METHYLGLYCINE"), 9.548 +('g', True, "MPQ", "N-METHYL-ALPHA-PHENYL-GLYCINE"), 9.549 +('g', True, "MSA", "(2-S-METHYL)"), 9.550 +('g', True, "NMC", "N-CYCLOPROPYLMETHYL"), 9.551 +('g', True, "PG9", "D-PHENYLGLYCINE"), 9.552 +('g', True, "PGY", "PHENYLGLYCINE"), 9.553 +('g', True, "SAR", "SARCOSINE"), 9.554 +('g', True, "SHP", "(4-HYDROXYMALTOSEPHENYL)GLYCINE"), 9.555 +('g', True, "TBG", "T-BUTYL"), 9.556 +('h', True, "3AH", "[HISTIDIN-1-YL-4H-[1,2,4]TRIAZOL-5-YL]-AMINE"), 9.557 +('h', True, "DDE", "{3-[4-(2-AMINO-2-CARBOXY-ETHYL)-1H-IMIDAZOL-2-YL]-1-CARBAMOYL-PROPYL}-TRIMETHYL-AMMONIUM"), 9.558 +('h', True, "DHI", "D-HISTIDINE"), 9.559 +('h', True, "HBN", "N-(2-NAPHTHYL)HISTIDINAMIDE"), 9.560 +('h', True, "HIA", "L-HISTIDINE"), 9.561 +('h', True, "HIC", "4-METHYL-HISTIDINE"), 9.562 +('h', True, "HIP", "ND1-PHOSPHONOHISTIDINE"), 9.563 +('h', True, "HIQ", "1-[1,2-DIHYDROXY-1-(HYDROXYMETHYL)ETHYL]-L-HISTIDINE"), 9.564 +('h', True, "HS8", "3-(1-SULFO-1H-IMIDAZOL-3-IUM-4-YL)-L-ALANINE"), 9.565 +('h', True, "HSO", "HISTIDINOL"), 9.566 +('h', True, "MHS", "N1-METHYLATED"), 9.567 +('h', True, "NEM", "NE2-METHYLATED"), 9.568 +('h', True, "NEP", "N1-PHOSPHONOHISTIDINE"), 9.569 +('h', True, "NZH", "(2S)-2-AMINO-3-[1-(1H-TETRAAZOL-5-YL)-1H-IMIDAZOL-4-YL]PROPANAL"), 9.570 +('h', True, "OHI", "3-(2-OXO-2H-IMIDAZOL-4-YL)-L-ALANINE"), 9.571 +('h', True, "PSH", "1-THIOPHOSPHONO-L-HISTIDINE"), 9.572 +('h', True, "PVH", "HISTIDINE-METHYL-ESTER"), 9.573 +('i', True, "B2I", "ISOLEUCINE"), 9.574 +('i', True, "BIU", "5-BROMO-L-ISOLEUCINE"), 9.575 +('i', True, "DIL", "D-ISOLEUCINE"), 9.576 +('i', True, "IIL", "ISO-ISOLEUCINE"), 9.577 +('i', True, "ILX", "4,5-DIHYDROXYISOLEUCINE"), 9.578 +('i', True, "IML", "N-METHYL-ISOLEUCINE"), 9.579 +('k', True, "0A2", "[(1R)-1,5-DIAMINOPENTYL][BIS(ETHANOLATO)]HYDROXYBORATE(1-)"), 9.580 +('k', True, "6CL", "6-CARBOXYLYSINE"), 9.581 +('k', True, "ALY", "N(6)-ACETYLLYSINE"), 9.582 +('k', True, "API", "2,6-DIAMINOPIMELIC"), 9.583 +('k', True, "APK", "5'-O-[(S)-{[(5S)-5-AMINO-6-OXOHEXYL]AMINO}(HYDROXY)PHOSPHORYL]ADENOSINE"), 9.584 +('k', True, "AZK", "(S)-2-AMINO-6-AZIDOHEXANOIC"), 9.585 +('k', True, "B3K", "(3S)-3,7-DIAMINOHEPTANOIC"), 9.586 +('k', True, "BLY", "LYSINE"), 9.587 +('k', True, "C1X", "(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)DIHYDRO-2H-THIOPYRAN-3(4H)-YLIDENE]-L-LYSINE"), 9.588 +('k', True, "CCL", "N~6~-[(CYCLOPENTYLOXY)CARBONYL]-D-LYSINE"), 9.589 +('k', True, "CLG", "2-AMINO-6-[2-(2-AMINOOXY-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC"), 9.590 +('k', True, "CLH", "2-AMINO-6-[2-(2-OXO-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC"), 9.591 +('k', True, "DLS", "DI-ACETYL-LYSINE"), 9.592 +('k', True, "DLY", "D-LYSINE"), 9.593 +('k', True, "DM0", "N~2~,N~2~,N~6~,N~6~-TETRAMETHYL-L-LYSINE"), 9.594 +('k', True, "DNL", "6-AMINO-HEXANAL"), 9.595 +('k', True, "DNS", "N~6~-{[5-(DIMETHYLAMINO)-1-NAPHTHYL]SULFONYL}-L-LYSINE"), 9.596 +('k', True, "FZN", "(2S)-2-amino-6-{[(1Z)-1-{[(2R,3R,4S,5R)-5-({[(R)-{[(R)-{[(2R,3S,4R,5R)-5-(6-amino-9H-purin-9-yl)-3,4-dihydroxytetrahydrofuran-2-yl]methoxy}(hydroxy)phosphoryl]oxy}(hydroxy)phosphoryl]oxy}methyl)-3,4-dihydroxytetrahydrofuran-2-yl]sulfanyl}ethylidene]amino}hexanoic acid"), 9.597 +('k', True, "GPL", "LYSINE GUANOSINE-5'-MONOPHOSPHATE"), 9.598 +('k', True, "I58", "4R-FLUORO-N6-ETHANIMIDOYL-L-LYSINE"), 9.599 +('k', True, "IEL", "N~6~-[(1Z)-ETHANIMIDOYL]-L-LYSINE"), 9.600 +('k', True, "IT1", "(E)-N~6~-({3-HYDROXY-2-METHYL-5-[(PHOSPHONOOXY)METHYL]PYRIDIN-4-YL}METHYLIDENE)-L-LYSINE"), 9.601 +('k', True, "KCX", "LYSINE"), 9.602 +('k', True, "KGC", "N~6~-[(2R)-2-CARBOXY-5-OXOTETRAHYDROFURAN-2-YL]-L-LYSINE"), 9.603 +('k', True, "KPI", "(2S)-2-AMINO-6-[(1-HYDROXY-1-OXO-PROPAN-2-YLIDENE)AMINO]HEXANOIC"), 9.604 +('k', True, "KST", "N~6~-(5-CARBOXY-3-THIENYL)-L-LYSINE"), 9.605 +('k', True, "KYQ", "(E)-N~6~-(1-CARBOXY-2-HYDROXYETHYLIDENE)-L-LYSINE"), 9.606 +('k', True, "LA2", "N~6~-[(6R)-6,8-DISULFANYLOCTANOYL]-L-LYSINE"), 9.607 +('k', True, "LCK", "(Z)-N~6~-(2-CARBOXY-1-METHYLETHYLIDENE)-L-LYSINE"), 9.608 +('k', True, "LCX", "CARBOXYLATED"), 9.609 +('k', True, "LDH", "N~6~-ETHYL-L-LYSINE"), 9.610 +('k', True, "LLP", "2-LYSINE(3-HYDROXY-2-METHYL-5-PHOSPHONOOXYMETHYL-PYRIDIN-4-YLMETHANE)"), 9.611 +('k', True, "LLY", "NZ-(DICARBOXYMETHYL)LYSINE"), 9.612 +('k', True, "LYM", "DEOXY-METHYL-LYSINE"), 9.613 +('k', True, "LYN", "2,6-DIAMINO-HEXANOIC"), 9.614 +('k', True, "LYR", "N~6~-[(2Z,4E,6E,8E)-3,7-DIMETHYL-9-(2,6,6-TRIMETHYLCYCLOHEX-1-EN-1-YL)NONA-2,4,6,8-TETRAENYL]LYSINE"), 9.615 +('k', True, "LYX", "N''-(2-COENZYME A)-PROPANOYL-LYSINE"), 9.616 +('k', True, "LYZ", "5-HYDROXYLYSINE"), 9.617 +('k', True, "M2L", "(2R)-2-AMINO-3-(2-DIMETHYLAMINOETHYLSULFANYL)PROPANOIC"), 9.618 +('k', True, "M3L", "N-TRIMETHYLLYSINE"), 9.619 +('k', True, "MCL", "NZ-(1-CARBOXYETHYL)-LYSINE"), 9.620 +('k', True, "ML3", "2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}-N,N,N-TRIMETHYLETHANAMINIUM"), 9.621 +('k', True, "MLY", "N-DIMETHYL-LYSINE"), 9.622 +('k', True, "MLZ", "N-METHYL-LYSINE"), 9.623 +('k', True, "SHR", "N-(5-AMINO-5-CARBOXYPENTYL)GLUTAMIC"), 9.624 +('k', True, "SLZ", "L-THIALYSINE"), 9.625 +('k', True, "TRG", "L-(N,N)"), 9.626 +('k', True, "VB1", "N^6^-[(1R)-2-{[(1S)-1-CARBOXYPROPYL]AMINO}-2-OXO-1-(SULFANYLMETHYL)ETHYL]-6-OXO-L-LYSINE"), 9.627 +('k', True, "XX1", "N~6~-7H-PURIN-6-YL-L-LYSINE"), 9.628 +('l', True, "0AG", "N-(ETHOXYCARBONYL)-L-LEUCINE"), 9.629 +('l', True, "1LU", "4-METHYL-PENTANOIC"), 9.630 +('l', True, "2LU", "2-AMINO-4-METHYL-PENTANYL"), 9.631 +('l', True, "2ML", "2-METHYLLEUCINE"), 9.632 +('l', True, "BLE", "LEUCINE"), 9.633 +('l', True, "BTA", "4-DEMETHYL-LEUCINE"), 9.634 +('l', True, "BUG", "TERT-LEUCYL"), 9.635 +('l', True, "CLE", "LEUCINE"), 9.636 +('l', True, "DLE", "D-LEUCINE"), 9.637 +('l', True, "DNE", "D-NORLEUCINE"), 9.638 +('l', True, "DNG", "N-FORMYL-D-NORLEUCINE"), 9.639 +('l', True, "DNM", "N-METHYL-D-NORLEUCINE"), 9.640 +('l', True, "DON", "6-DIAZENYL-5-OXO-L-NORLEUCINE"), 9.641 +('l', True, "EXY", "6-[(2R)-OXIRAN-2-YL]-L-NORLEUCINE"), 9.642 +('l', True, "FLE", "FUROYL-LEUCINE"), 9.643 +('l', True, "HLU", "BETA-HYDROXYLEUCINE"), 9.644 +('l', True, "LED", "(4R)-5-OXO-L-LEUCINE"), 9.645 +('l', True, "LEF", "(4S)-5-FLUORO-L-LEUCINE"), 9.646 +('l', True, "LEH", "N-[12-(1H-IMIDAZOL-1-YL)DODECANOYL]-L-LEUCINE"), 9.647 +('l', True, "MHL", "N-METHYL-4-HYDROXY-LEUCINE"), 9.648 +('l', True, "MLE", "N-METHYLLEUCINE"), 9.649 +('l', True, "MLL", "METHYL"), 9.650 +('l', True, "MNL", "4,N-DIMETHYLNORLEUCINE"), 9.651 +('l', True, "NLE", "NORLEUCINE"), 9.652 +('l', True, "NLN", "NORLEUCINE"), 9.653 +('l', True, "NLO", "O-METHYL-L-NORLEUCINE"), 9.654 +('l', True, "NLP", "(1-AMINO-PENTYL)-PHOSPHONIC"), 9.655 +('l', True, "PLE", "LEUCINE"), 9.656 +('l', True, "PPH", "PHENYLALANINE"), 9.657 +('m', True, "2FM", "S-(DIFLUOROMETHYL)HOMOCYSTEINE"), 9.658 +('m', True, "CXM", "N-CARBOXYMETHIONINE"), 9.659 +('m', True, "ESC", "2-AMINO-4-ETHYL"), 9.660 +('m', True, "FME", "N-FORMYLMETHIONINE"), 9.661 +('m', True, "KOR", "L-HOMOCYSTEINE-S-N-S-L-CYSTEINE"), 9.662 +('m', True, "MED", "D-METHIONINE"), 9.663 +('m', True, "MHO", "S-OXYMETHIONINE"), 9.664 +('m', True, "MME", "N-METHYL"), 9.665 +('m', True, "MSE", "SELENOMETHIONINE"), 9.666 +('m', True, "MSL", "(2S)-2-AMINO-4-(METHYLSULFONIMIDOYL)BUTANOIC"), 9.667 +('m', True, "MSO", "SELENOMETHIONINE"), 9.668 +('m', True, "MT2", "[(3S)-3-AMINO-3-CARBOXYPROPYL](ETHYL)METHYLSULFONIUM"), 9.669 +('m', True, "OMT", "S-DIOXYMETHIONINE"), 9.670 +('m', True, "SME", "METHIONINE"), 9.671 +('n', True, "0A5", "N~2~-PROPANOYL-L-ASPARAGINE"), 9.672 +('n', True, "AFA", "N-[7-METHYL-OCT-2,4-DIENOYL]ASPARAGINE"), 9.673 +('n', True, "AHB", "BETA-HYDROXYASPARAGINE"), 9.674 +('n', True, "B3X", "(3S)-3,5-DIAMINO-5-OXOPENTANOIC"), 9.675 +('n', True, "DMH", "N4,N4-DIMETHYL-ASPARAGINE"), 9.676 +('n', True, "DSG", "D-ASPARAGINE"), 9.677 +('n', True, "MEN", "N-METHYL"), 9.678 +('n', True, "SNN", "L-3-AMINOSUCCINIMIDE"), 9.679 +('p', True, "0AZ", "(4R)-4-HYDROXY-L-PROLINE"), 9.680 +('p', True, "2MT", "(4R)-2,2-DIMETHYL-1,3-THIAZOLIDINE-4-CARBOXYLIC"), 9.681 +('p', True, "4FB", "(4S)-4-FLUORO-L-PROLINE"), 9.682 +('p', True, "DPL", "4-OXOPROLINE"), 9.683 +('p', True, "DPR", "D-PROLINE"), 9.684 +('p', True, "H5M", "TRANS-3-HYDROXY-5-METHYLPROLINE"), 9.685 +('p', True, "HY3", "3-HYDROXYPROLINE"), 9.686 +('p', True, "HYP", "4-HYDROXYPROLINE"), 9.687 +('p', True, "LPD", "L-PROLINAMIDE"), 9.688 +('p', True, "N7P", "1-ACETYL-D-PROLINE"), 9.689 +('p', True, "P2Y", "(2S)-PYRROLIDIN-2-YLMETHYLAMINE"), 9.690 +('p', True, "PCC", "5-OXOPROLINE"), 9.691 +('p', True, "POM", "CIS-5-METHYL-4-OXOPROLINE"), 9.692 +('p', True, "PRS", "THIOPROLINE"), 9.693 +('p', True, "SLA", "(3S,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE"), 9.694 +('p', True, "SLR", "(3R,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE"), 9.695 +('q', True, "DGN", "D-GLUTAMINE"), 9.696 +('q', True, "GHG", "GAMMA-HYDROXY-GLUTAMINE"), 9.697 +('q', True, "GLH", "N-5-CYCLOHEXYL-N-5-[(CYCLOHEXYLAMINO)CARBONYL]GLUTAMINE"), 9.698 +('q', True, "MEQ", "N5-METHYLGLUTAMINE"), 9.699 +('q', True, "MGN", "2-METHYL-GLUTAMINE"), 9.700 +('q', True, "NLQ", "N~2~-ACETYL-L-GLUTAMINE"), 9.701 +('r', True, "2MR", "N3,"), 9.702 +('r', True, "AAR", "ARGININEAMIDE"), 9.703 +('r', True, "ACL", "DEOXY-CHLOROMETHYL-ARGININE"), 9.704 +('r', True, "AGM", "5-METHYL-ARGININE"), 9.705 +('r', True, "ALG", "GUANIDINOBUTYRYL"), 9.706 +('r', True, "ARM", "DEOXY-METHYL-ARGININE"), 9.707 +('r', True, "ARO", "C-GAMMA-HYDROXY"), 9.708 +('r', True, "BOR", "(1R)-1-AMINO-4-{[(E)-AMINO(IMINO)METHYL]AMINO}BUTYLBORONIC"), 9.709 +('r', True, "CIR", "CITRULLINE"), 9.710 +('r', True, "DAR", "D-ARGININE"), 9.711 +('r', True, "DIR", "3-{[(E)-AMINO(HYDROXYIMINO)METHYL]AMINO}ALANINE"), 9.712 +('r', True, "HAR", "N-OMEGA-HYDROXY-L-ARGININE"), 9.713 +('r', True, "HMR", "BETA-HOMOARGININE"), 9.714 +('r', True, "HRG", "L-HOMOARGININE"), 9.715 +('r', True, "MAI", "DEOXO-METHYLARGININE"), 9.716 +('r', True, "MGG", "2-(2-CARBOXY-ACETYLAMINO)-5-GUANIDINO-PENTANOIC"), 9.717 +('r', True, "NMM", "(R)-2-AMINO-5-(3-METHYLGUANIDINO)BUTANOIC"), 9.718 +('r', True, "NNH", "NOR-N-OMEGA-HYDROXY-L-ARGININE"), 9.719 +('r', True, "OPR", "C-(3-OXOPROPYL)ARGININE"), 9.720 +('r', True, "ORQ", "N~5~-ACETYL-L-ORNITHINE"), 9.721 +('s', True, "0AH", "O-(BROMOACETYL)-L-SERINE"), 9.722 +('s', True, "AZS", "O-DIAZOACETYL-L-SERINE"), 9.723 +('s', True, "B3S", "(3R)-3-AMINO-4-HYDROXYBUTANOIC"), 9.724 +('s', True, "BG1", "O-[(2S)-2-{METHYL[(METHYLAMINO)SULFONYL]AMINO}PENTANOYL]-L-SERINE"), 9.725 +('s', True, "BSE", "BETA-3-SERINE"), 9.726 +('s', True, "CWR", "(4-METHYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL)ACETIC"), 9.727 +('s', True, "DBS", "2-(2,3-DIHYDROXY-BENZOYLAMINO)-3-HYDROXY-PROPIONIC"), 9.728 +('s', True, "DSE", "N-METHYL-D-SERINE"), 9.729 +('s', True, "DSN", "D-SERINE"), 9.730 +('s', True, "FGP", "2-AMINO-3-HYDROXY-3-PHOSPHONOOXY-PROPIONIC"), 9.731 +('s', True, "HSE", "L-HOMOSERINE"), 9.732 +('s', True, "HSL", "HOMOSERINE"), 9.733 +('s', True, "LPS", "O-{HYDROXY[((2R)-2-HYDROXY-3-{[(1S)-1-HYDROXYPENTADECYL]OXY}PROPYL)OXY]PHOSPHORYL}-L-SERINE"), 9.734 +('s', True, "MC1", "METHICILLIN"), 9.735 +('s', True, "MIS", "MONOISOPROPYLPHOSPHORYLSERINE"), 9.736 +('s', True, "N10", "O-[(HEXYLAMINO)CARBONYL]-L-SERINE"), 9.737 +('s', True, "NC1", "NITROCEFIN"), 9.738 +('s', True, "OAS", "O-ACETYLSERINE"), 9.739 +('s', True, "OLZ", "O-(2-AMINOETHYL)-L-SERINE"), 9.740 +('s', True, "OSE", "O-SULFO-L-SERINE"), 9.741 +('s', True, "PG1", "PENICILLIN"), 9.742 +('s', True, "S1H", "1-HEXADECANOSULFONYL-O-L-SERINE"), 9.743 +('s', True, "SAC", "N-ACETYL-SERINE"), 9.744 +('s', True, "SBD", "D-NAPHTHYL-1-ACETAMIDO"), 9.745 +('s', True, "SBL", "L-NAPHTHYL-1-ACETAMIDO"), 9.746 +('s', True, "SDP", "2-AMINO-3-(DIETHOXY-PHOSPHORYLOXY)-PROPIONIC"), 9.747 +('s', True, "SEB", "O-BENZYLSULFONYL-SERINE"), 9.748 +('s', True, "SEL", "2-AMINO-1,3-PROPANEDIOL"), 9.749 +('s', True, "SEP", "PHOSPHOSERINE"), 9.750 +('s', True, "SET", "AMINOSERINE"), 9.751 +('s', True, "SGB", "O-[(S)-METHYL(1-METHYLETHOXY)PHOSPHORYL]-L-SERINE"), 9.752 +('s', True, "SOY", "OXACILLIN-ACYLATED"), 9.753 +('s', True, "SUN", "O-[(R)-(DIMETHYLAMINO)(ETHOXY)PHOSPHORYL]-L-SERINE"), 9.754 +('s', True, "SVA", "SERINE"), 9.755 +('s', True, "TNR", "O-(2-ACETAMIDO-2-DEOXY-ALPHA-D-GALACTOPYRANOSYL)-L-SERINE"), 9.756 +('t', True, "ALO", "ALLO-THREONINE"), 9.757 +('t', True, "BMT", "4-METHYL-4-[(E)-2-BUTENYL]-4,N-METHYL-THREONINE"), 9.758 +('t', True, "CTH", "4-CHLOROTHREONINE"), 9.759 +('t', True, "D11", "D-PHOSHPHOTHREONINE"), 9.760 +('t', True, "DTH", "D-THREONINE"), 9.761 +('t', True, "IYT", "N-ALPHA-ACETYL-3,5-DIIODOTYROSYL-D-THREONINE"), 9.762 +('t', True, "OLT", "O-METHYL-L-THREONINE"), 9.763 +('t', True, "TBM", "4-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE"), 9.764 +('t', True, "THC", "N-METHYLCARBONYLTHREONINE"), 9.765 +('t', True, "TMB", "N-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE"), 9.766 +('t', True, "TMD", "(6,7-DIHYDRO)4-[(E)-BUTENYL]-4,N-DIMETHYL-THREONINE"), 9.767 +('t', True, "TPO", "PHOSPHOTHREONINE"), 9.768 +('v', True, "0AA", "METHYL"), 9.769 +('v', True, "0AB", "(3S,4S)-3-AMINO-4-METHYL-3,4-DIHYDRO-2H-PYRAN-2-ONE"), 9.770 +('v', True, "2VA", "2'-(L-VALYL)AMINO-2'-DEOXYADENOSINE"), 9.771 +('v', True, "B2V", "VALINE"), 9.772 +('v', True, "DHN", "5-HYDROXY"), 9.773 +('v', True, "DIV", "D-ISOVALINE"), 9.774 +('v', True, "DVA", "D-VALINE"), 9.775 +('v', True, "MNV", "N-METHYL-C-AMINO"), 9.776 +('v', True, "MVA", "N-METHYLVALINE"), 9.777 +('v', True, "NVA", "NORVALINE"), 9.778 +('v', True, "VAD", "DEAMINOHYDROXYVALINE"), 9.779 +('v', True, "VAF", "METHYLVALINE"), 9.780 +('w', True, "0AF", "7-HYDROXY-L-TRYPTOPHAN"), 9.781 +('w', True, "1TQ", "6-(FORMYLAMINO)-7-HYDROXY-L-TRYPTOPHAN"), 9.782 +('w', True, "4DP", "3-[5-(DIMETHYLAMINO)-1,3-DIOXO-1,3-DIHYDRO-2H-ISOINDOL-2-YL]-L-ALANINE"), 9.783 +('w', True, "4FW", "4-FLUOROTRYPTOPHANE"), 9.784 +('w', True, "4HT", "4-HYDROXYTRYPTOPHAN"), 9.785 +('w', True, "6CW", "6-CHLORO-L-TRYPTOPHAN"), 9.786 +('w', True, "BTR", "6-BROMO-TRYPTOPHAN"), 9.787 +('w', True, "DTR", "D-TRYPTOPHAN"), 9.788 +('w', True, "FT6", "6-FLUORO-L-TRYPTOPHAN"), 9.789 +('w', True, "FTR", "FLUOROTRYPTOPHANE"), 9.790 +('w', True, "HRP", "5-HYDROXY-L-TRYPTOPHAN"), 9.791 +('w', True, "HTR", "BETA-HYDROXYTRYPTOPHANE"), 9.792 +('w', True, "LTR", "L-TRYPTOPHAN"), 9.793 +('w', True, "PAT", "ALPHA-PHOSPHONO-TRYPTOPHAN"), 9.794 +('w', True, "TOX", "2-AMINO-3-(1-HYDROPEROXY-1H-INDOL-3-YL)PROPAN-1-OL"), 9.795 +('w', True, "TPL", "TRYPTOPHANOL"), 9.796 +('w', True, "TQQ", "(S)-2-AMINO-3-(6,7-DIHYDRO-6-IMINO-7-OXO-1H-INDOL-3-YL)PROPANOIC"), 9.797 +('w', True, "TRF", "N1-FORMYL-TRYPTOPHAN"), 9.798 +('w', True, "TRN", "NZ2-TRYPTOPHAN"), 9.799 +('w', True, "TRO", "2-HYDROXY-TRYPTOPHAN"), 9.800 +('w', True, "TRQ", "2-AMINO-3-(6,7-DIOXO-6,7-DIHYDRO-1H-INDOL-3-YL)-PROPIONIC"), 9.801 +('w', True, "TRW", "TRW3-(2-AMINO-3-HYDROXY-PROPYL)-6-(N'-CYCLOHEXYL-HYDRAZINO)OCTAHYDRO-INDOL-7-OL"), 9.802 +('w', True, "TRX", "6-HYDROXYTRYPTOPHAN"), 9.803 +('w', True, "TTQ", "6-AMINO-7-HYDROXY-L-TRYPTOPHAN"), 9.804 +('x', True, "01W", "(2S)-2-AMMONIO-4-[(2,4-DINITROPHENYL)AMINO]BUTANOATE"), 9.805 +('x', True, "0AY", "DIETHYL"), 9.806 +('x', True, "0G6", "D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE"), 9.807 +('x', True, "0Z6", "D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PHENYLALANINAMIDE"), 9.808 +('x', True, "0ZC", "(3R)-2-[N-(FURAN-2-YLCARBONYL)-L-LEUCYL]-2,3,4,9-TETRAHYDRO-1H-BETA-CARBOLINE-3-CARBOXYLIC"), 9.809 +('x', True, "0ZE", "AMINO{[(4R)-4-({[(3R,6S,8AS)-6-AMINO-6-BENZYL-5-OXOOCTAHYDROINDOLIZIN-3-YL]CARBONYL}AMINO)-5-(1,3-BENZOTHIAZOL-2-YL)-5-OXOPENTYL]AMINO}METHANIMINIUM"), 9.810 +('x', True, "0ZJ", "N-(SULFANYLACETYL)-D-PHENYLALANYL-N-[(1S)-4-CARBAMIMIDAMIDO-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE"), 9.811 +('x', True, "0ZM", "(2R)-2-{[(2R)-2-{[(S)-HYDROXY{[(2R,3S,4S,5R,6R)-3,4,5-TRIHYDROXY-6-METHYLTETRAHYDRO-2H-PYRAN-2-YL]OXY}PHOSPHORYL]AMINO}-4-METHYLPENTANOYL]AMINO}-3-(1H-INDOL-3-YL)PROPANOIC"), 9.812 +('x', True, "175", "3,5-DIHYDRO-5-METHYLIDENE-4H-IMIDAZOL-4-ON"), 9.813 +('x', True, "193", "(2S)-4-(BETA-ALANYLAMINO)-2-AMINOBUTANOIC"), 9.814 +('x', True, "1PI", "3-(1-CARBAMIMIDOYL-PIPERIDIN-3-YL)-L-ALANINE"), 9.815 +('x', True, "1ZX", "D-PHENYLALANYL-N-[(1S)-1-ACETYL-4-{[AMINO(IMINIO)METHYL]AMINO}BUTYL]-L-PROLINAMIDE"), 9.816 +('x', True, "23S", "(S)-2-AMINO-3-(6H-SELENOLO[2,3-B]-PYRROL-4-YL)-PROPIONIC"), 9.817 +('x', True, "2AD", "2'-AMINO-2'-DEOXYADENOSINE"), 9.818 +('x', True, "2AO", "(2S)-2-AMINOHEXAN-1-OL"), 9.819 +('x', True, "2AS", "(2S,3S)-3-AMINO-2-METHYL-4-OXOBUTANOIC"), 9.820 +('x', True, "2DO", "(2S)-2-AMINOHEXANE-1,1-DIOL"), 9.821 +('x', True, "2PI", "2-AMINO-PENTANOIC"), 9.822 +('x', True, "2SI", "2-O-SULFO-A-L-IDURONIC"), 9.823 +('x', True, "32S", "(S)-2-AMINO-3-(4H-SELENOLO[3,2-B]-PYRROL-6-YL)-PROPIONIC"), 9.824 +('x', True, "32T", "(S)-2-AMINO-3-(4H-THIENO[3,2-B]-PYRROL-6-YL)-PROPIONIC"), 9.825 +('x', True, "3AR", "N-OMEGA-PROPYL-L-ARGININE"), 9.826 +('x', True, "3MM", "(1R)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM"), 9.827 +('x', True, "3TY", "3-[(3E)-3-(BENZYLHYDRAZONO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE"), 9.828 +('x', True, "4F3", "[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(4-FLUORO-1H-INDOL-3-YLMETHYL)-5-HYDROXY-IMIDAZOL-1-YL]-ACETIC"), 9.829 +('x', True, "4IN", "4-AMINO-L-TRYPTOPHAN"), 9.830 +('x', True, "4MM", "(1S)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM"), 9.831 +('x', True, "5ZA", "(5Z)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-5-[(4-AMINO-1H-INDOL-3-YL)METHYLENE]-3-(2-HYDROXYETHYL)-3,5-DIHYDRO-4H-IMIDAZOL-4-ONE"), 9.832 +('x', True, "A66", "2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-ADENINE"), 9.833 +('x', True, "AB7", "ALPHA-AMINOBUTYRIC"), 9.834 +('x', True, "ADD", "2,6,8-TRIMETHYL-3-AMINO-9-BENZYL-9-METHOXYNONANOIC"), 9.835 +('x', True, "AEA", "(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETIC"), 9.836 +('x', True, "AGT", "S-{(S)-AMINO[(4-AMINOBUTYL)AMINO]METHYL}-L-CYSTEINE"), 9.837 +('x', True, "AHH", "AMINO-HYDROXYHEPTANOIC"), 9.838 +('x', True, "AHS", "(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-ISOBUTYL-CARBAMIC"), 9.839 +('x', True, "AHT", "4-(2,5-DIAMINO-5-HYDROXY-PENTYL)-PHENOL"), 9.840 +('x', True, "ALQ", "2-METHYL-PROPIONIC"), 9.841 +('x', True, "ANI", "4-TRIFLUOROMETHYLANILINE"), 9.842 +('x', True, "APE", "(1-AMINO-2-PHENYL-ETHYL)-CARBAMIC"), 9.843 +('x', True, "APM", "M-AMIDINOPHENYL-3-ALANINE"), 9.844 +('x', True, "APO", "D-2-AMINO-3-PHOSPHONO-PROPIONIC"), 9.845 +('x', True, "APP", "1-ACETYL-2-CARBOXYPIPERIDINE"), 9.846 +('x', True, "ARV", "5-N-ALLYL-ARGININE"), 9.847 +('x', True, "AS9", "N-[HYDROXY(METHYL)PHOSPHORYL]-L-ASPARTIC"), 9.848 +('x', True, "ASM", "2-AMINO-4-OXO-4(1H-PYRROL-1-YL)BUTANOIC"), 9.849 +('x', True, "ASX", "ASP/ASN"), 9.850 +('x', True, "AVN", "(2S)-AMINO[(5S)-3-CHLORO-4,5-DIHYDROISOXAZOL-5-YL]ACETIC"), 9.851 +('x', True, "AYG", "[(4E)-2-[(1S)-1-AMINOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.852 +('x', True, "B3L", "(3S)-3-AMINO-5-METHYLHEXANOIC"), 9.853 +('x', True, "B3Q", "(3S)-3,6-DIAMINO-6-OXOHEXANOIC"), 9.854 +('x', True, "BCC", "6-AMINO-6-BENZYL-5-OXO-OCTAHYDRO-INDOLIZINE-3-CARBALDEHYDE"), 9.855 +('x', True, "BE2", "2-AMINOBENZOIC"), 9.856 +('x', True, "BIL", "(3R,4S)-3-AMINO-4-METHYLHEXANOIC"), 9.857 +('x', True, "BJH", "1(R)-1-ACETAMIDO-2-(3-CARBOXY-2-HYDROXYPHENYL)ETHYL"), 9.858 +('x', True, "BNO", "NORLEUCINE"), 9.859 +('x', True, "C12", "2-(1-AMINO-2-HYDROXYPROPYL)-4-(4-HYDROXYBENZYL)-1-(2-OXOETHYL)-1H-IMIDAZOL-5-OLATE"), 9.860 +('x', True, "C66", "2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-CYTOSINE"), 9.861 +('x', True, "C99", "{(2R)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-2-HYDROXY-4,5-DIOXOIMIDAZOLIDIN-1-YL}ACETIC"), 9.862 +('x', True, "CAL", "5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOBUTYL-HEXANOIC"), 9.863 +('x', True, "CAV", "5-AMINO-6-CYCLOHEXYL-3,4-DIHYDROXY-2-ISOPROPYL-HEXANOIC"), 9.864 +('x', True, "CCY", "2-(1-AMINO-2-MERCAPTO-ETHYL)-5-(4-HYDROXY-BENZYL)-3-(ETHANOYL)-3,5-DIHYDRO-IMIDAZOL-4-ONE"), 9.865 +('x', True, "CDE", "1,2-DIMETHYL-PROPYLAMINE"), 9.866 +('x', True, "CDV", "3-METHYL-2-UREIDO-BUTYRIC"), 9.867 +('x', True, "CFY", "[(2S)-2-{(2R)-2-[(1S)-1-AMINO-2-PHENYLETHYL]-2-HYDROXY-2,5-DIHYDRO-1,3-THIAZOL-4-YL}-4-(4-HYDROXYBENZYL)-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.868 +('x', True, "CH6", "{(4Z)-2-[(1S)-1-AMINO-3-(METHYLSULFANYL)PROPYL]-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"), 9.869 +('x', True, "CH7", "[(4Z)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-2-(3,4,5,6-TETRAHYDROPYRIDIN-2-YL)-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.870 +('x', True, "CHF", "CYCLOHEXYLFLUOROSTATONE"), 9.871 +('x', True, "CHG", "CYCLOHEXYL-GLYCINE"), 9.872 +('x', True, "CHS", "4-AMINO-5-CYCLOHEXYL-3-HYDROXY-PENTANOIC"), 9.873 +('x', True, "CLV", "{(2S)-2-[(1S)-1-AMINOETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"), 9.874 +('x', True, "CPC", "2-METHYL-1-METHYLAMINO-CYCLOPROPANE"), 9.875 +('x', True, "CPI", "6-CARBOXYPIPERIDINE"), 9.876 +('x', True, "CPV", "5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOPROPYL-HEXANOIC"), 9.877 +('x', True, "CQR", "[(4Z)-2-(AMINOMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.878 +('x', True, "CR0", "[2-(1-AMINO-2-HYDROXYPROPYL)-2-HYDROXY-4-ISOBUTYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETALDEHYDE"), 9.879 +('x', True, "CR2", "{(4Z)-2-(AMINOMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"), 9.880 +('x', True, "CR7", "[(4Z)-2-[(1S)-1,5-DIAMINOPENTYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.881 +('x', True, "CR8", "2-[1-AMINO-2-(1H-IMIDAZOL-5-YL)ETHYL]-1-(CARBOXYMETHYL)-4-[(4-OXOCYCLOHEXA-2,5-DIEN-1-YLIDENE)METHYL]-1H-IMIDAZOL-5-OLATE"), 9.882 +('x', True, "CRF", "[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETALDEHYDE"), 9.883 +('x', True, "CRG", "[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(3H-IMIDAZOL-4-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC"), 9.884 +('x', True, "CRK", "4-{(Z)-[2-[3-(METHYLSULFANYL)PROPANOYL]-5-OXO-1-(2-OXOETHYL)-1,5-DIHYDRO-4H-IMIDAZOL-4-YLIDENE]METHYL}BENZENOLATE"), 9.885 +('x', True, "CRO", "{2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"), 9.886 +('x', True, "CRQ", "[2-(3-CARBAMOYL-1-IMINO-PROPYL)-4-(4-HYDROXY-BENZYLIDENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC"), 9.887 +('x', True, "CRW", "[2-(1-AMINOETHYL)-4-METHYLENE-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.888 +('x', True, "CRX", "[2-(1-AMINOETHYL)-2-HYDROXY-4-METHYLENE-5-OXOIMIDAZOLIDIN-1-YL]ACETIC"), 9.889 +('x', True, "CSH", "[2-(2-HYDROXY-1-METHYL-ETHYL)-4-(1H-IMIDAZOL-4-YLMETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC"), 9.890 +('x', True, "CSY", "[4-(4-HYDROXY-BENZYL)-2-(2-HYDROXY-1-METHYL-ETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC"), 9.891 +('x', True, "CUC", "CYCLOHEXYLGLYCYL"), 9.892 +('x', True, "CYJ", "(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)PIPERIDIN-3-YLIDENE]-L-LYSINE"), 9.893 +('x', True, "D4P", "(2S)-AMINO(4-HYDROXYPHENYL)ACETIC"), 9.894 +('x', True, "DA2", "NG,NG-DIMETHYL-L-ARGININE"), 9.895 +('x', True, "DAM", "N-METHYL-ALPHA-BETA-DEHYDROALANINE"), 9.896 +('x', True, "DCI", "2-METHYL-BUTYLAMINE"), 9.897 +('x', True, "DCL", "2-AMINO-4-METHYL-PENTAN-1-OL"), 9.898 +('x', True, "DFI", "2,2-DIFLUOROSTATINE"), 9.899 +('x', True, "DFO", "2,2-DIFLUORO-3-HYDROSTATINE"), 9.900 +('x', True, "DHL", "2-AMINO-ETHANETHIOL"), 9.901 +('x', True, "DHP", "3-DECYL-2,5-DIOXO-4-HYDROXY-3-PYRROLINE"), 9.902 +('x', True, "DMT", "3-HYDROXY-4,4-DIMETHYL-2-(METHYLAMINO)-6-OCTENOIC"), 9.903 +('x', True, "DOA", "12-AMINO-DODECANOIC"), 9.904 +('x', True, "DYG", "(3S)-3-AMINO-3-[(4Z)-1-(CARBOXYMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-IMIDAZOL-2-YL]PROPANOIC"), 9.905 +('x', True, "EOV", "(3S,6S,9S,12R,15S,18S,21S,24S,30S,33S)-30-ethyl-33-[(1R,2R,4E)-1-hydroxy-2-methylhex-4-en-1-yl]-1,4,7,10,12,15,19,25,28-nonamethyl-3,21-bis(1-methylethyl)-6,9,18,24-tetrakis(2-methylpropyl)-1,4,7,10,13,16,19,22,25,28,31-undecaazacyclotritriacontane-2,5,8,11,14,17,20,23,26,29,32-undecone"), 9.906 +('x', True, "EYS", "S-SELANYLCYSTEINAL"), 9.907 +('x', True, "FHL", "(E)-N~6~-[3-CARBOXY-1-(HYDROXYMETHYL)PROPYLIDENE]-L-LYSINE"), 9.908 +('x', True, "FRD", "1-PHENYL-2-AMINOPROPANE"), 9.909 +('x', True, "GCM", "GLYCYLMETHYLENE"), 9.910 +('x', True, "GLM", "1-AMINO-PROPAN-2-ONE"), 9.911 +('x', True, "GLX", "GLU/GLN"), 9.912 +('x', True, "GND", "2-AMINO-5-GUANIDINO-PENTANOIC"), 9.913 +('x', True, "GVL", "O-[(R)-{[(3R)-4-AMINO-3-HYDROXY-2,2-DIMETHYL-4-OXOBUTYL]OXY}(HYDROXY)PHOSPHORYL]-L-SERINE"), 9.914 +('x', True, "GYC", "[(4Z)-2-[(1R)-1-AMINO-2-MERCAPTOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.915 +('x', True, "GYS", "[(4Z)-2-(1-AMINO-2-HYDROXYETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.916 +('x', True, "HAQ", "5-AMINO-4-OXO-1,2,4,5,6,7-HEXAHYDRO-AZEPINO[3,2,1-HI]INDOLE-2-CARBOXYLIC"), 9.917 +('x', True, "HCS", "2-AMINO-4-MERCAPTO-BUTYRIC"), 9.918 +('x', True, "HFA", "ALPHA-HYDROXY-BETA-PHENYL-PROPIONIC"), 9.919 +('x', True, "HHK", "(2S)-2,8-DIAMINOOCTANOIC"), 9.920 +('x', True, "HPH", "PHENYLALANINDIOL"), 9.921 +('x', True, "HV8", "BENZYL-2-AMINO-PARAMETHOXY-BENZYLSTATINE"), 9.922 +('x', True, "IEY", "2-((1E)-2-(5-IMIDAZOLYL)ETHENYL)-4-(P-HYDROXYBENZYLIDENE)-5-IMIDAZOLINONE"), 9.923 +('x', True, "IIC", "4-IMIDAZOLMETHYLENE-5-IMIDAZOLONE"), 9.924 +('x', True, "ISO", "PARA-ISOPROPYLANILINE"), 9.925 +('x', True, "IVA", "ISOVALERIC"), 9.926 +('x', True, "L2A", "(2S,5S,8S,11S,15E,20S)-20-AMINO-2-(CARBOXYMETHYL)-11,20-DIMETHYL-5,8-BIS(2-METHYLPROPYL)-3,6,9,21-TETRAOXO-1,4,7,10-TETRAAZACYCLOHENICOS-15-ENE-11-CARBOXYLIC"), 9.927 +('x', True, "LET", "(Z)-N^6-{3-CARBOXY-1-[(4-CARBOXY-2-OXOBUTOXY)METHYL]PROPYLIDENE}-L-LYSINE"), 9.928 +('x', True, "LNT", "N-[(2S)-2-AMINO-1,1-DIHYDROXY-4-METHYLPENTYL]-L-THREONINE"), 9.929 +('x', True, "LOL", "LEUCINOL"), 9.930 +('x', True, "LOV", "5-AMINO-4-HYDROXY-2-ISOPROPYL-7-METHYL-OCTANOIC"), 9.931 +('x', True, "LPL", "LEU-HYDROXYETHYLENE-LEU"), 9.932 +('x', True, "LSO", "(Z)-N~6~-(3-CARBOXY-1-{[(4-CARBOXY-2-OXOBUTYL)SULFONYL]METHYL}PROPYLIDENE)-L-LYSINE"), 9.933 +('x', True, "LTA", "4,8-DIAMINO-3-HYDROXY-OCTANOIC"), 9.934 +('x', True, "MCG", "(S)-(ALPHA)-METHYL-4-CARBOXYPHENYLGLYCINE"), 9.935 +('x', True, "MDH", "N-METHYLDEHYDROBUTYRINE"), 9.936 +('x', True, "MDO", "{2-[(1S)-1-AMINOETHYL]-5-HYDROXY-4-METHYL-1H-IMIDAZOL-1-YL}ACETIC"), 9.937 +('x', True, "MF3", "2-AMINO-4-TRIFLUOROMETHYLSULFANYL-BUTYRIC"), 9.938 +('x', True, "MFC", "5-[1-(3-FLUORO-4-HYDROXY-PHENYL)-METH-(Z)-YLIDENE]-3,5-DIHYDRO-IMIDAZOL-4-ONE"), 9.939 +('x', True, "MOD", "L-METHIONYL"), 9.940 +('x', True, "MOR", "N-CARBONYLMORPHOLINE"), 9.941 +('x', True, "MPH", "(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHONIC"), 9.942 +('x', True, "MPJ", "(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHINIC"), 9.943 +('x', True, "MSU", "SUCCINIC"), 9.944 +('x', True, "N2C", "N,S-DIMETHYLCYSTEINE"), 9.945 +('x', True, "NCY", "N-METHYLCYSTEINE"), 9.946 +('x', True, "NIT", "4-NITROANILINE"), 9.947 +('x', True, "NRQ", "{(4Z)-4-(4-HYDROXYBENZYLIDENE)-2-[3-(METHYLTHIO)PROPANIMIDOYL]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"), 9.948 +('x', True, "NSK", "N-SUCCINYL"), 9.949 +('x', True, "NYC", "[(4E)-2-[(1R,2S)-1-AMINO-2-HYDROXYPROPYL]-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.950 +('x', True, "NYG", "[(4Z)-2-[(1S)-1,3-DIAMINO-3-OXOPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.951 +('x', True, "O12", "N~5~-DODECANOYL-L-ORNITHINE"), 9.952 +('x', True, "OBS", "(Z)-N^6-[(4S,5R)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)-1-HYDROXYDIHYDRO-2H-THIOPYRANIUM-3(4H)-YLIDENE]-L-LYSINE"), 9.953 +('x', True, "ODA", "9-AMINO-6,10-DIOXO-OCTAHYDRO-PYRIDAZINO[1,2-A][1,2]DIAZEPINE-1-CARBOXYLIC"), 9.954 +('x', True, "ODS", "4-METHYLPIPERAZIN-1-YL"), 9.955 +('x', True, "OIC", "OCTAHYDROINDOLE-2-CARBOXYLIC"), 9.956 +('x', True, "OLE", "2-HYDROXY-4-METHYL-PENTANOIC"), 9.957 +('x', True, "ONL", "5-OXO-L-NORLEUCINE"), 9.958 +('x', True, "OTB", "TERTIARY-BUTOXYFORMIC"), 9.959 +('x', True, "PCE", "2-(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-PENT-4-YNOIC"), 9.960 +('x', True, "PDD", "N-(5'-PHOSPHOPYRIDOXYL)-D-ALANINE"), 9.961 +('x', True, "PDL", "N-(5'-PHOSPHOPYRIDOXYL)-L-ALANINE"), 9.962 +('x', True, "PGL", "AMINOMETHYLENEPHOSPHINIC"), 9.963 +('x', True, "PIV", "PIVALIC"), 9.964 +('x', True, "PTA", "[(1-AMINO-3-METHYL-BUTYL)-HYDROXY-PHOSPHINOYL]-ACETIC"), 9.965 +('x', True, "PVL", "PYRUVOYL"), 9.966 +('x', True, "QLG", "[(4Z)-2-[(1Z)-4-AMINO-4-OXOBUTANIMIDOYL]-4-(2-METHYLPROPYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.967 +('x', True, "R2P", "(2R)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC"), 9.968 +('x', True, "RC7", "{(2R)-4-(4-HYDROXYBENZYL)-2-[2-(1H-IMIDAZOL-4-YL)ETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETALDEHYDE"), 9.969 +('x', True, "RON", "NORVALINE"), 9.970 +('x', True, "SD2", "N-(SULFANYLACETYL)TYROSYLPROLYLMETHIONINAMIDE"), 9.971 +('x', True, "SEM", "3-AMINO-4-OXYBENZYL-2-BUTANONE"), 9.972 +('x', True, "SIC", "(2R)-2-[(3S)-3-AMINO-2,5-DIOXOPYRROLIDIN-1-YL]-3-SULFANYLPROPANOIC"), 9.973 +('x', True, "SIN", "SUCCINIC"), 9.974 +('x', True, "SUB", "3-PHENYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PROPIONIC"), 9.975 +('x', True, "SUI", "(3-AMINO-2,5-DIOXO-1-PYRROLIDINYL)ACETIC"), 9.976 +('x', True, "T66", "2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-THYMINE"), 9.977 +('x', True, "TA4", "(S)-2-[4-(AMINOMETHYL)-1H-1,2,3-TRIAZOL-1-YL]-4-METHYLPENTANOIC"), 9.978 +('x', True, "TCQ", "3-((3E)-4-(HYDROXYMETHYL)-6-OXO-3-{[(1S,2R)-2-PHENYLCYCLOPROPYL]IMINO}CYCLOHEXA-1,4-DIEN-1-YL)ALANINE"), 9.979 +('x', True, "TFA", "TRIFLUOROACETYL"), 9.980 +('x', True, "THO", "REDUCED"), 9.981 +('x', True, "TPH", "2-AMINO-3-PHENYL-PROPANE-1,1-DIOL"), 9.982 +('x', True, "TST", "4-METHYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PENTANOIC"), 9.983 +('x', True, "TYX", "S-(2-ANILINO-2-OXOETHYL)-L-CYSTEINE"), 9.984 +('x', True, "TZB", "(4S)-2-[(1E)-1-AMINOPROP-1-ENYL]-4,5-DIHYDRO-1,3-THIAZOLE-4-CARBOXYLIC"), 9.985 +('x', True, "TZO", "1,3-THIAZOLE-4-CARBOXYLIC"), 9.986 +('x', True, "UN1", "2-AMINOHEXANEDIOIC"), 9.987 +('x', True, "UN2", "2-AMINO-4,4-DIHYDROXYBUTANOIC"), 9.988 +('x', True, "UNK", "UNKNOWN"), 9.989 +('x', True, "VDL", "(2R,3R)-2,3-DIAMINOBUTANOIC"), 9.990 +('x', True, "VLL", "(2S)-2,3-DIAMINOBUTANOIC"), 9.991 +('x', True, "VLM", "VALINYLAMINE"), 9.992 +('x', True, "VMS", "5'O-[N-(L-VALYL)SULPHAMOYL]ADENOSINE"), 9.993 +('x', True, "VOL", "L-VALINOL"), 9.994 +('x', True, "X9A", "amino[(3-{(2Z,5S,6S,9R,12S,13R,16R)-5,16-dicarboxy-2-ethylidene-12-[(1E,3E,5S,6R)-6-methoxy-3,5-dimethyl-7-phenylhepta-1,3-dien-1-yl]-1,6,13-trimethyl-3,7,10,14,19-pentaoxo-1,4,8,11,15-pentaazacyclononadecan-9-yl}propyl)amino]methaniminium"), 9.995 +('x', True, "X9Q", "{(2S)-2-[(1S)-1-AMINOETHYL]-4-BENZYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC"), 9.996 +('x', True, "XCP", "(1S,2S)-2-AMINOCYCLOPENTANECARBOXYLIC"), 9.997 +('x', True, "XPC", "(3S,4R)-4-AMINOPYRROLIDINE-3-CARBOXYLIC"), 9.998 +('x', True, "XXY", "2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-1-(CARBOXYMETHYL)-4-(1H-IMIDAZOL-5-YLMETHYL)-1H-IMIDAZOL-5-OLATE"), 9.999 +('x', True, "XYG", "[(4Z)-2-[(1Z)-ETHANIMIDOYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC"), 9.1000 +('x', True, "ZFB", "(3R)-3-{[(BENZYLOXY)CARBONYL]AMINO}-2-OXO-4-PHENYLBUTANE-1-DIAZONIUM"), 9.1001 +('y', True, "0A1", "O-METHYL-L-TYROSINE"), 9.1002 +('y', True, "1TY", "3-{(3E)-4-HYDROXY-6-OXO-3-[(2-PHENYLETHYL)IMINO]CYCLOHEXA-1,4-DIEN-1-YL}ALANINE"), 9.1003 +('y', True, "2TY", "2-HYDROXY-5-{[(1E)-2-PHENYLETHYLIDENE]AMINO}-L-TYROSINE"), 9.1004 +('y', True, "4BF", "4-BROMO-L-PHENYLALANINE"), 9.1005 +('y', True, "AZY", "3-AZIDO-L-TYROSINE"), 9.1006 +('y', True, "B3Y", "(3S)-3-AMINO-4-(4-HYDROXYPHENYL)BUTANOIC"), 9.1007 +('y', True, "DBY", "3,5"), 9.1008 +('y', True, "DPQ", "(S)-2-AMINO-3-(4-HYDROXY-3-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC"), 9.1009 +('y', True, "DTY", "D-TYROSINE"), 9.1010 +('y', True, "ESB", "3-[(3E)-3-(ETHYLIMINO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE"), 9.1011 +('y', True, "FLT", "FLUOROMALONYL"), 9.1012 +('y', True, "FTY", "DEOXY-DIFLUOROMETHELENE-PHOSPHOTYROSINE"), 9.1013 +('y', True, "IYR", "3-IODO-TYROSINE"), 9.1014 +('y', True, "MBQ", "2-HYDROXY-5-({1-[(4-METHYLPHENOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)-L-TYROSINE"), 9.1015 +('y', True, "MTY", "META-TYROSINE"), 9.1016 +('y', True, "NBQ", "2-HYDROXY-5-({1-[(2-NAPHTHYLOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)TYROSINE"), 9.1017 +('y', True, "NIY", "META-NITRO-TYROSINE"), 9.1018 +('y', True, "NTY", "BETA"), 9.1019 +('y', True, "OTY", "2-AMINO-3-(4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC"), 9.1020 +('y', True, "PAQ", "2-OXY-4-HYDROXY-5-(2-HYDRAZINOPYRIDINE)PHENYLALANINE"), 9.1021 +('y', True, "PTH", "CE1-METHYLENE-HYDROXY-PHOSPHOTYROSINE"), 9.1022 +('y', True, "PTM", "ALPHA-METHYL-O-PHOSPHOTYROSINE"), 9.1023 +('y', True, "PTR", "O-PHOSPHOTYROSINE"), 9.1024 +('y', True, "STY", "TYROSINE-O-SULPHONIC"), 9.1025 +('y', True, "TPQ", "5-(2-CARBOXY-2-AMINOETHYL)-2-HYDROXY-1,4-BENZOQUINONE"), 9.1026 +('y', True, "TTS", "3-((3E)-4-HYDROXY-3-{[2-(4-HYDROXYPHENYL)ETHYL]IMINO}-6-OXOCYCLOHEXA-1,4-DIEN-1-YL)ALANINE"), 9.1027 +('y', True, "TY2", "3-AMINO-L-TYROSINE"), 9.1028 +('y', True, "TY3", "3-HYDROXY-L-TYROSINE"), 9.1029 +('y', True, "TYB", "TYROSINAL"), 9.1030 +('y', True, "TYI", "3,5-DIIODOTYROSINE"), 9.1031 +('y', True, "TYN", "AMINOBENZOFURAZAN-O-TYROSINE"), 9.1032 +('y', True, "TYO", "(4Z,6E)-2-AMINO-7-HYDROPEROXY-4-[(E)-2-HYDROXYVINYL]HEPTA-4,6-DIENOIC"), 9.1033 +('y', True, "TYQ", "3-AMINO-6-HYDROXY-TYROSINE"), 9.1034 +('y', True, "TYS", "O-SULFO-L-TYROSINE"), 9.1035 +('y', True, "TYT", "TYROSINE"), 9.1036 +('y', True, "TYY", "3-(4-HYDROXY-3-IMINO-6-OXO-CYCLOHEXA-1,4-DIENYL)-ALANINE"), 9.1037 +('y', True, "YOF", "3-FLUOROTYROSINE"), 9.1038 +) 9.1039 + 9.1040 +rna = ( 9.1041 +('A', False, "A", "ADENOSINE-5'-MONOPHOSPHATE"), 9.1042 +('C', False, "C", "CYTIDINE-5'-MONOPHOSPHATE"), 9.1043 +('G', False, "G", "GUANOSINE-5'-MONOPHOSPHATE"), 9.1044 +('U', False, "U", "URIDINE-5'-MONOPHOSPHATE"), 9.1045 +('N', False, "", "Undefined RNA Nucelotide"), 9.1046 +('a', True, "12A", "2-METHYLTHIO-N6-(AMINOCARBONYL-L-THREONYL)-ADENOSINE-5'-MONOPHOSPHATE"), 9.1047 +('a', True, "1MA", "6-HYDRO-1-METHYLADENOSINE-5'-MONOPHOSPHATE"), 9.1048 +('a', True, "2MA", "2-METHYLADENOSINE-5'-MONOPHOSPHATE"), 9.1049 +('a', True, "5FA", "ADENOSINE-5'-PENTAPHOSPHATE"), 9.1050 +('a', True, "6IA", "N6-ISOPENTENYL-ADENOSINE-5'-MONOPHOSPHATE"), 9.1051 +('a', True, "6MA", "N6-METHYL-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"), 9.1052 +('a', True, "6MC", "CIS-N6-METHYL-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"), 9.1053 +('a', True, "6MT", "TRANS-N6-METHYL-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"), 9.1054 +('a', True, "8AN", "3'-amino-3'-deoxyadenosine 5'-(dihydrogen phosphate)"), 9.1055 +('a', True, "A23", "ADENOSINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"), 9.1056 +('a', True, "A2L", "3'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"), 9.1057 +('a', True, "A2M", "2'-O-METHYL-ADENOSINE-5'-MONOPHOSPHATE"), 9.1058 +('a', True, "A39", "2'-O-METHYL-ADENOSINE 5'-MONOPHOSPHATE"), 9.1059 +('a', True, "A3P", "ADENOSINE-3'-5'-DIPHOSPHATE"), 9.1060 +('a', True, "A44", "2'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"), 9.1061 +('a', True, "AET", "N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)METHYLCARBAMOYL]THREONINE-5'-MONOPHOSPHATE"), 9.1062 +('a', True, "AVC", "ADENOSINE-5'-MONOPHOSPHATE-2',3'-VANADATE"), 9.1063 +('a', True, "LCA", "[(1R,3R,4R,7S)-7-HYDROXY-3-(ADENIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL"), 9.1064 +('a', True, "MA6", "6N-DIMETHYLADENOSINE-5'-MONOPHOSHATE"), 9.1065 +('a', True, "MAD", "6-HYDRO-1-METHYLADENOSINE-5'-MONOPHOSPHATE"), 9.1066 +('a', True, "MGQ", "7-BENZYL"), 9.1067 +('a', True, "MIA", "2-METHYLTHIO-N6-ISOPENTENYL-ADENOSINE-5'-MONOPHOSPHATE"), 9.1068 +('a', True, "MTU", "9-BETA-D-RIBOFURANOSYL-9H-PURIN-2-AMINE"), 9.1069 +('a', True, "P5P", "PURINE RIBOSIDE-5'-MONOPHOSPHATE"), 9.1070 +('a', True, "PPU", "PUROMYCIN-5'-MONOPHOSPHATE"), 9.1071 +('a', True, "PR5", "PURINE RIBOSIDE-5'-MONOPHOSPHATE"), 9.1072 +('a', True, "PU", "PUROMYCIN-N-AMINOPHOSPHONIC"), 9.1073 +('a', True, "RIA", "2'-O-[(5'-PHOSPHO)RIBOSYL]ADENOSINE-5'-MONOPHOSPHATE"), 9.1074 +('a', True, "SRA", "ADENOSINE -5'-THIO-MONOPHOSPHATE"), 9.1075 +('a', True, "T6A", "N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)CARBAMOYL]THREONINE-5'-MONOPHOSPHATE"), 9.1076 +('a', True, "ZAD", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-ADENINE"), 9.1077 +('c', True, "10C", "4-AMINO-1-{2,5-ANHYDRO-4-[(PHOSPHONOOXY)METHYL]-ALPHA-L-LYXOFURANOSYL}PYRIMIDIN-2(1H)-ONE"), 9.1078 +('c', True, "1SC", "5'-O-THIOPHOSPHONOCYTIDINE"), 9.1079 +('c', True, "4OC", "4N,O2'-METHYLCYTIDINE-5'-MONOPHOSPHATE"), 9.1080 +('c', True, "5IC", "5-IODO-CYTIDINE-5'-MONOPHOSPHATE"), 9.1081 +('c', True, "5MC", "5-METHYLCYTIDINE-5'-MONOPHOSPHATE"), 9.1082 +('c', True, "A5M", "2'-AMINE-CYTIDINE-5'-MONOPHOSPHATE"), 9.1083 +('c', True, "C25", "CYTIDINE-5'-MONOPHOSPHATE"), 9.1084 +('c', True, "C2L", "5-METHYL-3'-O-METHOXYETHYL CYTIDINE 5'-MONOPHOSPHATE"), 9.1085 +('c', True, "C31", "2'-O-3-AMINOPROPYL CYTIDINE-5'-MONOPHOSPHATE"), 9.1086 +('c', True, "C43", "2'-O-METHYOXYETHYL-CYTIDINE-5'-MONOPHOSPHATE"), 9.1087 +('c', True, "CBV", "5-BROMOCYTIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.1088 +('c', True, "CCC", "CYTIDINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"), 9.1089 +('c', True, "CH", "N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"), 9.1090 +('c', True, "CSF", "CYTIDINE-5'-MONOPHOSPHATE-3-FLUORO-N-ACETYL-NEURAMINIC ACID"), 9.1091 +('c', True, "IC", "ISOCYTIDINE-5'-MONOPHOSPHATE"), 9.1092 +('c', True, "LC", "L-CYTIDINE-5'-MONOPHOSPHATE"), 9.1093 +('c', True, "M5M", "2'-(N-ACETAMIDE)-CYTIDINE-5'-MONOPHOSPHATE"), 9.1094 +('c', True, "N5M", "5-nitrocytidine 5'-(dihydrogen phosphate)"), 9.1095 +('c', True, "OMC", "O2'-METHYLYCYTIDINE-5'-MONOPHOSPHATE"), 9.1096 +('c', True, "PMT", "PHOSPHORIC"), 9.1097 +('c', True, "S4C", "4'-THIO-4'-DEOXY-CYTOSINE-5'-MONOPHOSPHATE"), 9.1098 +('c', True, "ZCY", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-CYTOSINE"), 9.1099 +('g', True, "1MG", "1N-METHYLGUANOSINE-5'-MONOPHOSPHATE"), 9.1100 +('g', True, "2MG", "2N-METHYLGUANOSINE-5'-MONOPHOSPHATE"), 9.1101 +('g', True, "7MG", "7N-METHYL-8-HYDROGUANOSINE-5'-MONOPHOSPHATE"), 9.1102 +('g', True, "CG1", "5'-O-[(R)-HYDROXY(METHOXY)PHOSPHORYL]GUANOSINE"), 9.1103 +('g', True, "G25", "GUANOSINE-5'-MONOPHOSPHATE"), 9.1104 +('g', True, "G2L", "3'-O-METHYOXYETHYL-GUANOSINE-5'-MONOPHOSPHATE"), 9.1105 +('g', True, "G46", "2'-DEOXY-GUANOSINE-5'-MONOTHIOPHOSPHATE"), 9.1106 +('g', True, "G48", "2'-O-METHYOXYETHYL-GUANOSINE-5'-MONOPHOSPHATE"), 9.1107 +('g', True, "G7M", "N7-METHYL-GUANOSINE-5'-MONOPHOSPHATE"), 9.1108 +('g', True, "GAO", "GUANINE ARABINOSE-5'-PHOSPHATE"), 9.1109 +('g', True, "GDP", "GUANOSINE-5'-DIPHOSPHATE"), 9.1110 +('g', True, "GH3", "3'-DEOXY-GUANOSINE-5'-TRIPHOSPHATE"), 9.1111 +('g', True, "GOM", "GLUTAMOL-AMP"), 9.1112 +('g', True, "GTP", "GUANOSINE-5'-TRIPHOSPHATE"), 9.1113 +('g', True, "IG", "ISOGUANOSINE-5'-MONOPHOSPHATE"), 9.1114 +('g', True, "KAG", "2'-DEOXY-N-[(1S)-1-METHYL-3-OXOPROPYL]GUANOSINE 5'-PHOSPHATE"), 9.1115 +('g', True, "LG", "L-GUANOSINE-5'-MONOPHOSPHATE"), 9.1116 +('g', True, "M2G", "N2-DIMETHYLGUANOSINE-5'-MONOPHOSPHATE"), 9.1117 +('g', True, "MGV", "P-FLUORO-7-BENZYL"), 9.1118 +('g', True, "N6G", "((2R,3S,4R,5S)-5-(2,6-DIAMINO-9H-PURIN-9-YL)-3,4-DIHYDROXY-TETRAHYDROFURAN-2-YL)METHYL"), 9.1119 +('g', True, "OMG", "O2'-METHYLGUANOSINE-5'-MONOPHOSPHATE"), 9.1120 +('g', True, "PGP", "GUANOSINE-3',5'-DIPHOSPHATE"), 9.1121 +('g', True, "QUO", "2-AMINO-7-DEAZA-(2'',3''-DIHYDROXY-CYCLOPENTYLAMINO)-GUANOSINE-5'-MONOPHOSPHATE"), 9.1122 +('g', True, "TPG", "2,2,7-TRIMETHYL-GUANOSINE-5'-TRIPHOSPHATE-5'-GUANOSINE"), 9.1123 +('g', True, "XTS", "9-[(2R,3R,4S,5R)-3,4-DIHYDROXY-5-(HYDROXYMETHYL)OXOLAN-2-YL]-3H-PURINE-2,6-DIONE"), 9.1124 +('g', True, "YG", "WYBUTOSINE"), 9.1125 +('g', True, "YYG", "4-(3-[5-O-PHOSPHONORIBOFURANOSYL]-4,6-DIMETHYL-8-OXO-4,8-DIHYDRO-3H-1,3,4,5,7A-PENTAAZA-S-INDACEN-YLAMINO-BUTYRIC)"), 9.1126 +('g', True, "ZGU", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-GUANINE"), 9.1127 +('n', True, "128", "SPIRO(2,4,6-TRINITROBENZENE[1,2A]-2O',3O'-METHYLENE-ADENINE-TRIPHOSPHATE"), 9.1128 +('n', True, "1PR", "PHOSPHORIC"), 9.1129 +('n', True, "2SA", "2-[9-(3,4-DIHYDROXY-5-PHOSPHONOOXYMETHYL-TETRAHYDRO-FURAN-2-YL)-9H-PURIN-6-YLAMINO]-SUCCINIC"), 9.1130 +('n', True, "4AC", "N(4)-ACETYLCYTIDINE-5'-MONOPHOSPHATE"), 9.1131 +('n', True, "4TA", "P1-(5'-ADENOSYL)P4-(5'-(2'-DEOXY-THYMIDYL))TETRAPHOSPHATE"), 9.1132 +('n', True, "6MZ", "N6-METHYLADENOSINE-5'-MONOPHOSPHATE"), 9.1133 +('n', True, "ADX", "ADENOSINE-5'-PHOSPHOSULFATE"), 9.1134 +('n', True, "BMP", "6-HYDROXYURIDINE-5'-PHOSPHATE"), 9.1135 +('n', True, "BT5", "BIOTINYL-5-AMP"), 9.1136 +('n', True, "CM0", "5-(CARBOXYMETHOXY) URIDINE-5'-MONOPHOSPHATE"), 9.1137 +('n', True, "CS8", "3-THIAOCTANOYL-COENZYME"), 9.1138 +('n', True, "DBM", "9-(6-DEOXY-BETA-D-ALLOFURANOSYL)-6-METHYLPURINE"), 9.1139 +('n', True, "ENA", "ETHENO-NAD"), 9.1140 +('n', True, "ENP", "ETHENO-NADP"), 9.1141 +('n', True, "FA5", "ADENOSINE-5'-[PHENYLALANINYL-PHOSPHATE]"), 9.1142 +('n', True, "FAI", "5-(FORMYLAMINO)-1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE-4-CARBOXAMIDE"), 9.1143 +('n', True, "FMU", "5-FLUORO-5-METHYLURIDINE-5'-MONOPHOSPHATE"), 9.1144 +('n', True, "G4P", "GUANOSINE-5',3'-TETRAPHOSPHATE"), 9.1145 +('n', True, "I", "INOSINIC"), 9.1146 +('n', True, "IRN", "1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE"), 9.1147 +('n', True, "JW5", "6-(HYDROXYMETHYL)URIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 9.1148 +('n', True, "LMS", "[(2R,3S,4R,5R)-5-(6-AMINO-9H-PURIN-9-YL)-3,4-DIHYDROXYTETRAHYDRO-2-FURANYL]METHYL"), 9.1149 +('n', True, "N", "ANY 5'-MONOPHOSPHATE NUCLEOTIDE"), 9.1150 +('n', True, "NF2", "(1S)-1,4-ANHYDRO-1-(2,4-DIFLUORO-5-METHYLPHENYL)-5-O-PHOSPHONO-D-RIBITOL"), 9.1151 +('n', True, "OAD", "2'-O-ACETYL ADENOSINE-5-DIPHOSPHORIBOSE"), 9.1152 +('n', True, "ODP", "4-OXO-NICOTINAMIDE-ADENINE"), 9.1153 +('n', True, "P1P", "3-{[OXIDO(OXO)PHOSPHINO]OXY}PROPAN-1-OLATE"), 9.1154 +('n', True, "PQ1", "PHOSPHORIC"), 9.1155 +('n', True, "PUY", "PUROMYCIN"), 9.1156 +('n', True, "PYY", "D-RIBOFURANOSYL-BENZENE-5'-MONOPHOSPHATE"), 9.1157 +('n', True, "RT", "RIBOSYLTHYMINE-5'-MONOPHOSPHATE"), 9.1158 +('n', True, "RTP", "RIBAVIRIN"), 9.1159 +('n', True, "SMT", "2'-[(METHYLTHIO)ETHYLOXY]-THYMIDINE-5'-MONOPHOSPHATE"), 9.1160 +('n', True, "T23", "2'-O-METHYL-3'-METHYL-3'-DEOXY-ARABINOFURANOSYL-THYMINE-5'-PHOSPHATE"), 9.1161 +('n', True, "T2S", "THYMIDINE-5'-DITHIOPHOSPHORATE"), 9.1162 +('n', True, "T38", "2'-O-ETHOXYMETHYLENE THYMIDINE 5'-MONOPHOSPHATE"), 9.1163 +('n', True, "T39", "2'-O-METHOXYETHYLENE THYMIDINE 5'-MONOPHOSPHATE"), 9.1164 +('n', True, "T41", "1-(2-O-METHYL-BETA-D-ARABINOFURANOSYL)THYMIDINE 5'-MONOPHOSPHATE"), 9.1165 +('n', True, "TAL", "9-(6-DEOXY-ALPHA-L-TALOFURANOSYL)-6-METHYLPURINE"), 9.1166 +('n', True, "TLB", "2'-O,3'-C-METHYLENE-ARABINOFURANOSYL-THYMINE-5'-MONOPHOSPHATE"), 9.1167 +('n', True, "TYU", "TETRAHYDROURIDINE"), 9.1168 +('n', True, "UDP", "URIDINE-5'-DIPHOSPHATE"), 9.1169 +('n', True, "ZHP", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-HYDROXYPYRIDONE"), 9.1170 +('n', True, "ZTH", "(S)-1'-(2',3'-DIHYDROXYPROPYL)-THYMINE"), 9.1171 +('u', True, "125", "2'-O-FLUOROETHYL-5-METHYL-URIDINE-5'-MONOPHOSPHATE"), 9.1172 +('u', True, "126", "2'-O-METHYL-[TRI(OXYETHYL)]-5-METHYL-URIDINE-5'-MONOPHOSPHATE"), 9.1173 +('u', True, "127", "2'-O-AMINOOXY-ETHYL-5-METHYL-URIDINE-5'-MONOPHOSPHATE"), 9.1174 +('u', True, "2AU", "2'-AMINOURIDINE"), 9.1175 +('u', True, "2MU", "2',5-DIMETHYLURIDINE-5'-MONOPHOSPHATE"), 9.1176 +('u', True, "4SU", "4-THIOURIDINE-5'-MONOPHOSPHATE"), 9.1177 +('u', True, "5BU", "5-BROMO-URIDINE-5'-MONOPHOSPHATE"), 9.1178 +('u', True, "5FU", "5-FLUORO-URIDINE-5'-MONOPHOSPHATE"), 9.1179 +('u', True, "5MU", "5-METHYLURIDINE 5'-MONOPHOSPHATE"), 9.1180 +('u', True, "70U", "5-(O-METHYLACETO)-2-THIO-2-DEOXY-URIDINE-5'-MONOPHOSPHATE"), 9.1181 +('u', True, "CNU", "5-CYANO-URIDINE-5'-MONOPHOSPHATE"), 9.1182 +('u', True, "DHU", "5,6-DIHYDROURIDINE-5'-PHOSPHATE"), 9.1183 +('u', True, "FHU", "(5S,6R)-5-FLUORO-6-HYDROXY-PSEUDOURIDINE-5'-MONOPHOSPHATE"), 9.1184 +('u', True, "H2U", "5,6-DIHYDROURIDINE-5'-MONOPHOSPHATE"), 9.1185 +('u', True, "IU", "5-IODOURIDINE-5'-MONOPHOSPHATE"), 9.1186 +('u', True, "LHU", "L-URIDINE-5'-MONOPHOSPHATE"), 9.1187 +('u', True, "MEP", "PHOSPHORIC"), 9.1188 +('u', True, "MNU", "(2R,4S)-1-[(4R)-3,4-DIHYDROXYTETRAHYDROFURAN-2-YL]-5-[(METHYLAMINO)METHYL]-1,2,3,4-TETRAHYDROPYRIMIDINE-2,4-DIOL-5'-MONOPHOSPHATE"), 9.1189 +('u', True, "OMU", "O2'-METHYLURIDINE 5'-MONOPHOSPHATE"), 9.1190 +('u', True, "ONE", "1-(BETA-D-RIBOFURANOSYL)-PYRIDIN-4-ONE-5'-PHOSPHATE"), 9.1191 +('u', True, "PSU", "PSEUDOURIDINE-5'-MONOPHOSPHATE"), 9.1192 +('u', True, "PYO", "1-(BETA-D-RIBOFURANOSYL)-PYRIMIDIN-2-ONE-5'-PHOSPHATE"), 9.1193 +('u', True, "S4U", "4-THIOURIDINE-5'-PHOSPHATE"), 9.1194 +('u', True, "SSU", "URIDINE-5'-PHOSPHOROTHIOATE"), 9.1195 +('u', True, "SUR", "1-(BETA-D-RIBOFURANOSYL)-2-THIO-URACIL-5'-PHOSPHATE"), 9.1196 +('u', True, "T31", "THYMIDINE 5'-MONOTHIOPHOSPHATE"), 9.1197 +('u', True, "U25", "URIDINE-5'-MONOPHOSPHATE"), 9.1198 +('u', True, "U2L", "5-METHYL-3'-O-METHOXYETHYL URIDINE-5'-MONOPHOSPHATE"), 9.1199 +('u', True, "U2P", "PHOSPHORIC"), 9.1200 +('u', True, "U31", "2'-O-3-AMINOPROPYL 2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 9.1201 +('u', True, "U34", "URIDINE 5'-MONOMETHYLPHOSPHATE"), 9.1202 +('u', True, "U36", "2'-O-METHYOXYETHYL-URIDINE-5'-MONOPHOSPHATE"), 9.1203 +('u', True, "U37", "URIDINE 5'-MONOTHIOPHOSPHATE"), 9.1204 +('u', True, "U8U", "5-METHYLAMINOMETHYL-2-THIOURIDINE-5'-MONOPHOSPHATE"), 9.1205 +('u', True, "UAR", "URACIL ARABINOSE-5'-PHOSPHATE"), 9.1206 +('u', True, "UD5", "5-FLUOROURIDINE"), 9.1207 +('u', True, "UMP", "2'-DEOXYURIDINE 5'-MONOPHOSPHATE"), 9.1208 +('u', True, "UR3", "3-METHYLURIDINE-5'-MONOPHOSHATE"), 9.1209 +('u', True, "URD", "1-((2R,3R,4S,5R)-TETRAHYDRO-3,4-DIHYDROXY-5-(HYDROXYMETHYL)FURAN-2-YL)PYRIDINE-2,4(1H,3H)-DIONE"), 9.1210 +) 9.1211 + 9.1212 +# vim: set et ts=4 sts=4 sw=4:
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/allpy/data/codes_template.txt Tue Jan 25 16:03:00 2011 +0300 10.3 @@ -0,0 +1,17 @@ 10.4 +"""Tables of monomer codes. 10.5 + 10.6 +`dna`, `rna`, `protein` are lists of all known codes for monomers of given 10.7 +type. Each of them is a list of tuples of kind: 10.8 + 10.9 + ( 1-letter code, is-modified?, 3-letter-code, fullname ) 10.10 + 10.11 +`3-letter-code` is the code used in PDB (it may actually be one or 10.12 +two letters) 10.13 + 10.14 +""" 10.15 + 10.16 +protein = %(protein)s 10.17 +dna = %(dna)s 10.18 +rna = %(rna)s 10.19 + 10.20 +# vim: set et ts=4 sts=4 sw=4:
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/allpy/data/mkcodes.py Tue Jan 25 16:03:00 2011 +0300 11.3 @@ -0,0 +1,96 @@ 11.4 +import os 11.5 +import argparse 11.6 +from pprint import pformat 11.7 + 11.8 +def rel(*x): 11.9 + return os.path.join(os.path.abspath(os.path.dirname(__file__)), *x) 11.10 + 11.11 +p = argparse.ArgumentParser( 11.12 +description='Components.cif to codes.py converter', 11.13 +epilog='', 11.14 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 11.15 +) 11.16 + 11.17 +r = argparse.FileType('r') 11.18 +w = argparse.FileType('w') 11.19 + 11.20 +p.add_argument('-v','--version',action='version',version='%(prog)s 1.0') 11.21 +p.add_argument('-i',help='input components.cif',metavar='FILE',type=r, 11.22 + required=True) 11.23 +p.add_argument('-o',help='output codes.py',metavar='FILE',type=w, 11.24 + default=rel('codes.py')) 11.25 +p.add_argument('-t',help='Template for codes.py',metavar='FILE',type=r, 11.26 + default=rel('codes_template.txt')) 11.27 + 11.28 +try: 11.29 + args = p.parse_args() 11.30 +except Exception, t: 11.31 + print t 11.32 + exit() 11.33 + 11.34 +AAbank = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D', 'CYS':'C', 11.35 + 'GLN':'Q', 'GLU':'E', 'GLY':'G', 'HIS':'H', 'ILE':'I', 11.36 + 'LEU':'L', 'LYS':'K', 'MET':'M', 'PHE':'F', 'PRO':'P', 11.37 + 'SER':'S', 'THR':'T', 'TRP':'W', 'TYR':'Y', 'VAL':'V', 11.38 + 'DA' :'A', 'DT' :'T', 'DG' :'G', 'DC' :'C', 11.39 + 'A': 'A', 'U' :'U', 'G' :'G', 'C' :'C'} 11.40 + 11.41 +protein = [] 11.42 +dna = [] 11.43 +rna = [] 11.44 + 11.45 +cif_entry = {} 11.46 + 11.47 +for line in args.i: 11.48 + line = line.strip() 11.49 + if line == '#' and cif_entry: 11.50 + try: 11.51 + monomer_type = cif_entry['_chem_comp.type'].strip() 11.52 + if "PEPTIDE" in monomer_type: 11.53 + container = protein 11.54 + elif "DNA" in monomer_type: 11.55 + container = dna 11.56 + elif "RNA" in monomer_type: 11.57 + container = rna 11.58 + else: 11.59 + continue 11.60 + code1 = cif_entry['_chem_comp.one_letter_code'].strip().lower() 11.61 + if code1 == '?': 11.62 + parent = cif_entry['_chem_comp.mon_nstd_parent_comp_id'] 11.63 + parent = parent.strip().upper() 11.64 + if parent in AAbank: 11.65 + code1 = AAbank[parent].lower() 11.66 + else: 11.67 + if container == protein: 11.68 + code1 = 'x' 11.69 + else: 11.70 + code1 = 'n' 11.71 + code3 = cif_entry['_chem_comp.three_letter_code'].strip().upper() 11.72 + name = cif_entry['_chem_comp.name'].strip() 11.73 + if name[0] == '"' and name[-1] == '"': 11.74 + name = name[1:-1] 11.75 + modified = code3 not in AAbank 11.76 + 11.77 + container.append((code1, modified, code3, name)) 11.78 + 11.79 + cif_entry = {} # for new entry 11.80 + except: 11.81 + pass 11.82 + else: 11.83 + key_value = line.split(' ', 1) 11.84 + if len(key_value) == 2: 11.85 + key, value = key_value 11.86 + cif_entry[key] = value 11.87 + 11.88 +protein.append(('X', False, "", "Undefined Aminoacid")) 11.89 +dna.append(('N', False, "", "Undefined DNA Nucelotide")) 11.90 +rna.append(('N', False, "", "Undefined RNA Nucelotide")) 11.91 + 11.92 +protein.sort() 11.93 +dna.sort() 11.94 +rna.sort() 11.95 + 11.96 +template = args.t.read() 11.97 +args.o.write(template % {'protein': pformat(protein, width=1024), 11.98 + 'dna': pformat(dna, width=1024), 11.99 + 'rna': pformat(rna, width=1024)})
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/allpy/dna.py Tue Jan 25 16:03:00 2011 +0300 12.3 @@ -0,0 +1,21 @@ 12.4 +import base 12.5 +import data.codes 12.6 + 12.7 +class Monomer(base.Monomer): 12.8 + """DNA monomers: nucleotides.""" 12.9 + type = 'dna' 12.10 + by_code1 = {} 12.11 + by_code3 = {} 12.12 + by_name = {} 12.13 +Monomer._initialize(data.codes.dna) 12.14 + 12.15 +class Sequence(base.Sequence): 12.16 + monomer_type = Monomer 12.17 + 12.18 +class Alignment(base.Alignment): 12.19 + sequence_type = Sequence 12.20 + 12.21 +class Block(Alignment, base.Block): 12.22 + pass 12.23 + 12.24 +# vim: set ts=4 sts=4 sw=4 et:
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/allpy/fasta.py Tue Jan 25 16:03:00 2011 +0300 13.3 @@ -0,0 +1,43 @@ 13.4 +import util 13.5 + 13.6 +def parse_file(file): 13.7 + """Parse fasta file, remove spaces and newlines from sequence bodies. 13.8 + 13.9 + Return a list of tuples (name, description, sequence_body). 13.10 + """ 13.11 + sequences = [] 13.12 + for part in file.read().split("\n>"): 13.13 + header, _, body = part.partition("\n") 13.14 + header = header.lstrip(">").strip() 13.15 + name, _, description = header.partition(" ") 13.16 + name = name.strip() 13.17 + description = description.strip() 13.18 + body = util.remove_each(body, " \n\r\t\v") 13.19 + sequences.append((name, description, body)) 13.20 + return sequences 13.21 + 13.22 +def save_file(out_file, string, name, description='', long_line=70): 13.23 + """ Saves given string to out_file in fasta_format 13.24 + 13.25 + Splits long lines to substrings of length=long_line 13.26 + To prevent this, set long_line=None 13.27 + """ 13.28 + if description: 13.29 + name += " " + description 13.30 + out_file.write(">%s\n" % name) 13.31 + if long_line: 13.32 + for i in range(0, len(string) // long_line + 1): 13.33 + out_file.write("%s \n" % string[i*long_line : i*long_line + long_line]) 13.34 + else: 13.35 + out_file.write("%s \n" % string) 13.36 + 13.37 +def determine_long_line(in_file): 13.38 + """ Returns maximum sequence line length in fasta file """ 13.39 + sequences = in_file.read().split('>') 13.40 + for sequence in sequences[1:]: 13.41 + lines = sequence.split('\n')[1:] 13.42 + if len(lines) >= 2: 13.43 + return len(lines[0].strip()) 13.44 + return 70 13.45 + 13.46 +# vim: set ts=4 sts=4 sw=4 et:
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/allpy/graph.py Tue Jan 25 16:03:00 2011 +0300 14.3 @@ -0,0 +1,288 @@ 14.4 +# -*- coding: utf-8 -*- 14.5 + 14.6 +from datetime import datetime, timedelta 14.7 +from copy import copy 14.8 + 14.9 +class TimeoutError(Exception): 14.10 + pass 14.11 + 14.12 + 14.13 + 14.14 +class Graph(object): 14.15 + """ Undirected weighted graph 14.16 + 14.17 + Data: 14.18 + nodes -- set of elements 14.19 + lines -- {line: cost}. 14.20 + line is frozenset([e1, e2]) 14.21 + cost is float in (0, 1] or 1 (if all lines are equal) 14.22 + 14.23 + >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1}) 14.24 + >>> g.fast_cliques() 14.25 + Fast algorithm started 14.26 + [frozenset([1, 2]), frozenset([3])] 14.27 + >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1, frozenset([1,1]): 1}) 14.28 + >>> g.fast_cliques() 14.29 + Fast algorithm started 14.30 + [frozenset([1, 2]), frozenset([3])] 14.31 + >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98, 14.32 + ... frozenset([2,3]): 0.1, frozenset([1,1]): 1}) 14.33 + >>> g.fast_cliques() 14.34 + Fast algorithm started 14.35 + [frozenset([1, 2, 3]), frozenset([4])] 14.36 + >>> g.bron_kerbosh() 14.37 + Bron and Kerbosh algorithm started 14.38 + [frozenset([1, 2, 3]), frozenset([4])] 14.39 + >>> g.cliques() 14.40 + Bron and Kerbosh algorithm started 14.41 + [frozenset([1, 2, 3])] 14.42 + """ 14.43 + 14.44 + def __init__(self, nodes=None, lines=None): 14.45 + if not nodes: 14.46 + nodes = set() 14.47 + if not lines: 14.48 + lines = dict() 14.49 + self.nodes = set(nodes) # copy 14.50 + self.lines = {} 14.51 + for line, cost in lines.items(): 14.52 + if len(line) == 2 and line.issubset(self.nodes): 14.53 + self.lines[line] = cost 14.54 + 14.55 + @staticmethod 14.56 + def line(k1, k2): 14.57 + """ Construct object, representing line of graph """ 14.58 + return frozenset([k1, k2]) 14.59 + 14.60 + def bounded(self, k1, k2): 14.61 + """ Return if these two nodes of the graph are bounded with line """ 14.62 + return k1 == k2 or Graph.line(k1, k2) in self.lines 14.63 + 14.64 + def count_one(self, node): 14.65 + """ Returns number of connections of this node """ 14.66 + return len([node1 for node1 in self.nodes if self.bounded(node, node1)]) - 1 14.67 + 14.68 + def cost_one(self, node): 14.69 + """ Returns sum of costs of all connections of this node """ 14.70 + return sum([self.lines.get(Graph.line(node, node1), 0) 14.71 + for node1 in self.nodes if node != node1]) 14.72 + 14.73 + def count_all(self): 14.74 + """ Returns {node: number of connections of this node} """ 14.75 + c = dict([(node, 0) for node in self.nodes]) 14.76 + for line in self.lines: 14.77 + for node in line: 14.78 + c[node] += 1 14.79 + return c 14.80 + 14.81 + 14.82 + def drop_node(self, node): 14.83 + """ Remove node and all involved lines """ 14.84 + for node1 in self.nodes: 14.85 + self.lines.pop(Graph.line(node, node1), None) 14.86 + self.nodes.discard(node) 14.87 + 14.88 + def add_node(self, node, parent_graph): 14.89 + """ Add node and corresponding lines from parent_graph 14.90 + 14.91 + Added lines should be contained in self graph 14.92 + (takes care of hanging lines) 14.93 + """ 14.94 + self.nodes.add(node) 14.95 + for node1 in self.nodes: 14.96 + line = Graph.line(node, node1) 14.97 + if line in parent_graph.lines: 14.98 + self.lines[line] = parent_graph.lines[line] 14.99 + 14.100 + def drop_nodes(self, nodes): 14.101 + """ Run drop_node for each of given nodes 14.102 + 14.103 + Returns if nodes was not empty (ugly beauty) 14.104 + """ 14.105 + for node in nodes: 14.106 + self.drop_node(node) 14.107 + return bool(nodes) 14.108 + 14.109 + def drop_if_count(self, minsize): 14.110 + """ Run drop_node for each node, that has less than minsize lines """ 14.111 + while True: 14.112 + if not self.drop_nodes([node for (node, count) 14.113 + in self.count_all().items() if count < minsize]): 14.114 + break 14.115 + 14.116 + def bron_kerbosh(self, timeout=-1, minsize=1): 14.117 + """ Bron and Kerboch algorithm implementation 14.118 + 14.119 + returns list of cliques 14.120 + clique is frozenset 14.121 + if timeout=-1, it means infinity 14.122 + if timeout has happened, raises TimeoutError 14.123 + 14.124 + lava flow 14.125 + """ 14.126 + print 'Bron and Kerbosh algorithm started' 14.127 + cliques = [] 14.128 + 14.129 + depth = 0 14.130 + list_candidates = [copy(self.nodes)] 14.131 + list_used = [set()] 14.132 + compsub = [] 14.133 + 14.134 + start_time = datetime.now() 14.135 + timeout_timedelta = timedelta(timeout) 14.136 + 14.137 + while True: # ПОКА... 14.138 + if depth == -1: 14.139 + break # ВСЕ! Все рекурсии (итерации) пройдены 14.140 + candidates = copy(list_candidates[depth]) 14.141 + used = copy(list_used[depth]) 14.142 + if not candidates: # ПОКА candidates НЕ пусто 14.143 + depth -= 1 14.144 + if compsub: 14.145 + compsub.pop() 14.146 + continue 14.147 + 14.148 + # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates 14.149 + # (все из used НЕ соединены хотя бы с 1 из candidates) 14.150 + used_candidates = False 14.151 + 14.152 + for used1 in used: 14.153 + for candidates1 in candidates: 14.154 + if not self.bounded(used1, candidates1): 14.155 + break 14.156 + else: 14.157 + used_candidates = True 14.158 + 14.159 + if used_candidates: 14.160 + depth -= 1 14.161 + 14.162 + if compsub: 14.163 + compsub.pop() 14.164 + continue 14.165 + 14.166 + # Выбираем вершину v из candidates и добавляем ее в compsub 14.167 + v = candidates.pop() 14.168 + candidates.add(v) 14.169 + compsub.append(v) 14.170 + # Формируем new_candidates и new_used, удаляя из candidates и used вершины, НЕ соединенные с v 14.171 + # (то есть, оставляя только соединенные с v) 14.172 + new_candidates = set() 14.173 + for candidates1 in candidates: 14.174 + if self.bounded(candidates1, v) and candidates1 != v: 14.175 + new_candidates.add(candidates1) 14.176 + 14.177 + new_used = set() 14.178 + for used1 in used: 14.179 + if self.bounded(used1, v) and used1 != v: 14.180 + new_used.add(used1) 14.181 + 14.182 + # Удаляем v из candidates и помещаем в used 14.183 + list_candidates[depth].remove(v) 14.184 + list_used[depth].add(v) 14.185 + # ЕСЛИ new_candidates и new_used пусты 14.186 + if not new_candidates and not new_used: 14.187 + # compsub ? клика 14.188 + if len(compsub) >= minsize: 14.189 + cliques.append(frozenset(compsub)) 14.190 + else: 14.191 + # ИНАЧЕ рекурсивно вызываем bron_kerbosh(new_candidates, new_used) 14.192 + depth += 1 14.193 + 14.194 + # TIMEOUT check start 14.195 + if timeout != -1: 14.196 + if datetime.now() - start_time > timeout_timedelta: 14.197 + raise TimeoutError 14.198 + # TIMEOUT check end 14.199 + 14.200 + if depth >= len(list_candidates): 14.201 + list_candidates.append(set()) 14.202 + list_used.append(set()) 14.203 + 14.204 + list_candidates[depth] = copy(new_candidates) 14.205 + list_used[depth] = copy(new_used) 14.206 + 14.207 + continue 14.208 + 14.209 + # Удаляем v из compsub 14.210 + if compsub: 14.211 + compsub.pop() 14.212 + 14.213 + return cliques 14.214 + 14.215 + 14.216 + def fast_cliques(self, minsize=1): 14.217 + """ returns list of cliques 14.218 + 14.219 + clique is frozenset 14.220 + """ 14.221 + print 'Fast algorithm started' 14.222 + cliques = [] 14.223 + 14.224 + while True: 14.225 + graph = Graph(self.nodes, self.lines) 14.226 + for clique in cliques: 14.227 + graph.drop_nodes(clique) 14.228 + if not graph.nodes: 14.229 + break 14.230 + 14.231 + while True: 14.232 + # drop nodes, while its is possible 14.233 + if len(graph.nodes) == 1: 14.234 + break 14.235 + c = graph.count_all() 14.236 + min_count = min(c.values()) 14.237 + bad_nodes = [node for (node, count) in c.items() if count == min_count] 14.238 + if len(bad_nodes) == len(graph.nodes) and min_count != 0: 14.239 + break 14.240 + 14.241 + costs = dict([(node, graph.cost_one(node)) for node in bad_nodes]) 14.242 + min_cost = min(costs.values()) 14.243 + for node, cost in costs.items(): 14.244 + if cost == min_cost: 14.245 + graph.drop_node(node) 14.246 + break 14.247 + 14.248 + while True: 14.249 + # add nodes, while its is possible 14.250 + candidats = {} 14.251 + for node in self.nodes: 14.252 + c = len([i for i in graph.nodes if self.bounded(node, i)]) 14.253 + if c == len(self.nodes): 14.254 + graph1 = Graph(graph.nodes, graph.lines) 14.255 + graph1.add_node(node, self) 14.256 + candidats[node] = graph1.cost_one(node) 14.257 + if not candidats: 14.258 + break 14.259 + 14.260 + max_cost = max(candidats.values()) 14.261 + node = [node for (node, cost) in candidats.items() if cost == max_cost][0] 14.262 + graph.add_node(node, self) 14.263 + 14.264 + cliques.append(frozenset(graph.nodes)) 14.265 + 14.266 + return cliques 14.267 + 14.268 + 14.269 + def cliques(self, timeout=-1, minsize=1): 14.270 + """ returns length-sorted list of cliques 14.271 + 14.272 + clique is frozenset 14.273 + 14.274 + can change self! 14.275 + 14.276 + try to execute bron_kerbosh 14.277 + if it raises TimeoutError, executes fast_cliques 14.278 + """ 14.279 + 14.280 + self.drop_if_count(minsize) 14.281 + 14.282 + try: 14.283 + cliques = self.bron_kerbosh(timeout, minsize) 14.284 + cliques.sort(key=lambda clique: len(clique), reverse=True) 14.285 + except TimeoutError: 14.286 + cliques = self.fast_cliques(minsize) 14.287 + return cliques 14.288 + 14.289 +if __name__ == "__main__": 14.290 + import doctest 14.291 + doctest.testmod()
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/allpy/pdb.py Tue Jan 25 16:03:00 2011 +0300 15.3 @@ -0,0 +1,341 @@ 15.4 +""" Functions to get pdb information from fasta id 15.5 +and to generate fasta id from pdb information 15.6 + 15.7 +pdb information: code, chain, model 15.8 + 15.9 +TODO: same for local pdb files 15.10 +""" 15.11 + 15.12 +import re 15.13 +import os 15.14 +import os.path 15.15 +from tempfile import NamedTemporaryFile 15.16 +import urllib2 15.17 + 15.18 +from Bio.PDB import PDBParser 15.19 +from Bio.PDB import Superimposer, CaPPBuilder, PDBIO 15.20 +from Bio.PDB.DSSP import make_dssp_dict 15.21 + 15.22 +import base 15.23 +from graph import Graph 15.24 + 15.25 + 15.26 +# for pdb-codes 15.27 +re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I) 15.28 + 15.29 +#~ # for files 15.30 +#~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$") 15.31 + 15.32 +def std_id(pdb_id, pdb_chain, pdb_model=None): 15.33 + if pdb_model: 15.34 + return "%s_%s_%s" % \ 15.35 + (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model) 15.36 + else: 15.37 + return "%s_%s" % \ 15.38 + (pdb_id.lower().strip(), pdb_chain.upper().strip()) 15.39 + 15.40 +def pdb_id_parse(ID): 15.41 + match = re1.search(ID) 15.42 + if not match: 15.43 + return None 15.44 + d = match.groupdict() 15.45 + if 'chain' not in d or not d['chain']: 15.46 + d['chain'] = ' ' 15.47 + if 'model' not in d or not d['model']: 15.48 + d['model'] = 0 15.49 + return d 15.50 + 15.51 + 15.52 +def get_structure(file, name): 15.53 + return PDBParser().get_structure(name, file) 15.54 + 15.55 +#~ def std_id_parse(ID): 15.56 + #~ """ 15.57 + #~ Parse standart ID to pdb_code, chain and model 15.58 + #~ """ 15.59 + #~ if '.ent' in ID.lower() or '.pdb' in ID.lower(): 15.60 + #~ # it is file 15.61 + #~ parseO = self.re2.search(ID) # files 15.62 + #~ else: 15.63 + #~ parseO = self.re1.search(ID.lower()) # pdb codes 15.64 + #~ if not parseO: 15.65 + #~ return None 15.66 + #~ parse = parseO.groups() 15.67 + #~ if len(parse) < 2: 15.68 + #~ return None 15.69 + #~ code = parse[1] 15.70 + #~ chain = '' 15.71 + #~ model = None 15.72 + #~ if len(parse) >= 4: 15.73 + #~ chain = parse[3] 15.74 + #~ if chain: 15.75 + #~ chain = chain.upper() 15.76 + #~ if len(parse) >= 6: 15.77 + #~ if parse[5]: 15.78 + #~ model = parse[5] 15.79 + #~ return code, chain, model 15.80 + 15.81 +class SequenceMixin(base.Sequence): 15.82 + """Mixin for adding PDB data to a Sequence. 15.83 + 15.84 + Please note: since this is a mixin, no objects of this class should be 15.85 + created. This class is intended to be subclassed together with one of 15.86 + Sequence classes. 15.87 + 15.88 + Attributes: 15.89 + 15.90 + * pdb_chain -- Bio.PDB.Chain 15.91 + * pdb_file -- file object 15.92 + 15.93 + * pdb_residues -- {Monomer: Bio.PDB.Residue} 15.94 + * pdb_secstr -- {Monomer: 'Secondary structure'} 15.95 + Code Secondary structure 15.96 + H alpha-helix 15.97 + B Isolated beta-bridge residue 15.98 + E Strand 15.99 + G 3-10 helix 15.100 + I pi-helix 15.101 + T Turn 15.102 + S Bend 15.103 + - Other 15.104 + 15.105 + 15.106 + ?TODO: global pdb_structures 15.107 + """ 15.108 + 15.109 + def __init__(self, *args, **kw): 15.110 + self.pdb_chains = [] 15.111 + self.pdb_files = {} 15.112 + self.pdb_residues = {} 15.113 + self.pdb_secstr = {} 15.114 + 15.115 + def set_pdb_chain(self, pdb_file, pdb_id, pdb_chain, pdb_model=0): 15.116 + """ Reads Pdb chain from file 15.117 + 15.118 + and align each Monomer with PDB.Residue (TODO) 15.119 + """ 15.120 + name = std_id(pdb_id, pdb_chain, pdb_model) 15.121 + structure = get_structure(pdb_file, name) 15.122 + chain = structure[pdb_model][pdb_chain] 15.123 + self.pdb_chains.append(chain) 15.124 + self.pdb_residues[chain] = {} 15.125 + self.pdb_secstr[chain] = {} 15.126 + pdb_sequence = Sequence.from_pdb_chain(chain) 15.127 + a = alignment.Alignment.from_sequences(self, pdb_sequence) 15.128 + a.muscle_align() 15.129 + for monomer, pdb_monomer in a.column(sequence=pdb_sequence, original=self): 15.130 + if pdb_sequence.pdb_has(chain, pdb_monomer): 15.131 + residue = pdb_sequence.pdb_residues[chain][pdb_monomer] 15.132 + self.pdb_residues[chain][monomer] = residue 15.133 + self.pdb_files[chain] = pdb_file 15.134 + 15.135 + def pdb_unload(self): 15.136 + """ Delete all pdb-connected links """ 15.137 + #~ gc.get_referrers(self.pdb_chains[0]) 15.138 + self.pdb_chains = [] 15.139 + self.pdb_residues = {} 15.140 + self.pdb_secstr = {} # FIXME 15.141 + self.pdb_files = {} # FIXME 15.142 + 15.143 + @staticmethod 15.144 + def from_pdb_chain(chain): 15.145 + """ Returns Sequence with Monomers with link to Bio.PDB.Residue 15.146 + 15.147 + chain is Bio.PDB.Chain 15.148 + """ 15.149 + cappbuilder = CaPPBuilder() 15.150 + peptides = cappbuilder.build_peptides(chain) 15.151 + sequence = Sequence() 15.152 + sequence.pdb_chains = [chain] 15.153 + sequence.pdb_residues[chain] = {} 15.154 + sequence.pdb_secstr[chain] = {} 15.155 + for peptide in peptides: 15.156 + for ca_atom in peptide.get_ca_list(): 15.157 + residue = ca_atom.get_parent() 15.158 + monomer = AminoAcidType.from_pdb_residue(residue).instance() 15.159 + sequence.pdb_residues[chain][monomer] = residue 15.160 + sequence.monomers.append(monomer) 15.161 + return sequence 15.162 + 15.163 + def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'): 15.164 + """ Adds pdb information to each monomer 15.165 + 15.166 + Returns if information has been successfully added 15.167 + TODO: conformity_file 15.168 + 15.169 + id-format lava flow 15.170 + """ 15.171 + if not conformity_info: 15.172 + path = os.path.join(pdb_directory, self.name) 15.173 + if os.path.exists(path) and os.path.getsize(path): 15.174 + match = pdb_id_parse(self.name) 15.175 + self.pdb_chain_add(open(path), match['code'], 15.176 + match['chain'], match['model']) 15.177 + else: 15.178 + match = pdb_id_parse(self.name) 15.179 + if match: 15.180 + code = match['code'] 15.181 + pdb_filename = config.pdb_dir % code 15.182 + if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename): 15.183 + url = config.pdb_url % code 15.184 + print "Download %s" % url 15.185 + pdb_file = open(pdb_filename, 'w') 15.186 + data = urllib2.urlopen(url).read() 15.187 + pdb_file.write(data) 15.188 + pdb_file.close() 15.189 + print "Save %s" % pdb_filename 15.190 + pdb_file = open(pdb_filename) 15.191 + self.pdb_chain_add(pdb_file, code, match['chain'], match['model']) 15.192 + 15.193 + def pdb_save(self, out_filename, pdb_chain): 15.194 + """ Saves pdb_chain to out_file """ 15.195 + class GlySelect(Select): 15.196 + def accept_chain(self, chain): 15.197 + if chain == pdb_chain: 15.198 + return 1 15.199 + else: 15.200 + return 0 15.201 + io = PDBIO() 15.202 + structure = chain.get_parent() 15.203 + io.set_structure(structure) 15.204 + io.save(out_filename, GlySelect()) 15.205 + 15.206 + 15.207 + def pdb_add_sec_str(self, pdb_chain): 15.208 + """ Add secondary structure data """ 15.209 + tmp_file = NamedTemporaryFile(delete=False) 15.210 + tmp_file.close() 15.211 + pdb_file = self.pdb_files[pdb_chain].name 15.212 + os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name}) 15.213 + dssp, keys = make_dssp_dict(tmp_file.name) 15.214 + for monomer in self.monomers: 15.215 + if self.pdb_has(pdb_chain, monomer): 15.216 + residue = self.pdb_residues[pdb_chain][monomer] 15.217 + try: 15.218 + d = dssp[(pdb_chain.get_id(), residue.get_id())] 15.219 + self.pdb_secstr[pdb_chain][monomer] = d[1] 15.220 + except: 15.221 + print "No dssp information about %s at %s" % (monomer, pdb_chain) 15.222 + os.unlink(tmp_file.name) 15.223 + 15.224 + def pdb_has(self, chain, monomer): 15.225 + return chain in self.pdb_residues and monomer in self.pdb_residues[chain] 15.226 + 15.227 + def secstr_has(self, chain, monomer): 15.228 + return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain] 15.229 + 15.230 + 15.231 +class AlignmentMixin(base.Alignment): 15.232 + """Mixin to add 3D properties to alignments. 15.233 + 15.234 + Please note: since this is a mixin, no objects of this class should be 15.235 + created. This class is intended to be subclassed together with one of 15.236 + Alignment classes. 15.237 + """ 15.238 + 15.239 + def secstr(self, sequence, pdb_chain, gap='-'): 15.240 + """ Returns string representing secondary structure """ 15.241 + return ''.join([ 15.242 + (sequence.pdb_secstr[pdb_chain][m] if sequence.secstr_has(pdb_chain, m) else gap) 15.243 + for m in self.body[sequence]]) 15.244 + 15.245 +class BlockMixin(base.Block): 15.246 + """Mixin to add 3D properties to blocks. 15.247 + 15.248 + Please note: since this is a mixin, no objects of this class should be 15.249 + created. This class is intended to be subclassed together with one of 15.250 + Block classes. 15.251 + """ 15.252 + 15.253 + def geometrical_cores(self, max_delta=config.delta, 15.254 + timeout=config.timeout, minsize=config.minsize, 15.255 + ac_new_atoms=config.ac_new_atoms, 15.256 + ac_count=config.ac_count): 15.257 + """ Returns length-sorted list of blocks, representing GCs 15.258 + 15.259 + * max_delta -- threshold of distance spreading 15.260 + * timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) 15.261 + * minsize -- min size of each core 15.262 + * ac_new_atoms -- min part or new atoms in new alternative core 15.263 + current GC is compared with each of already selected GCs if 15.264 + difference is less then ac_new_atoms, current GC is skipped 15.265 + difference = part of new atoms in current core 15.266 + * ac_count -- max number of cores (including main core) 15.267 + -1 means infinity 15.268 + 15.269 + If more than one pdb chain for some sequence provided, consider all of them 15.270 + cost is calculated as 1 / (delta + 1) 15.271 + 15.272 + delta in [0, +inf) => cost in (0, 1] 15.273 + """ 15.274 + nodes = self.positions 15.275 + lines = {} 15.276 + for i in self.positions: 15.277 + for j in self.positions: 15.278 + if i < j: 15.279 + distances = [] 15.280 + for sequence in self.sequences: 15.281 + for chain in sequence.pdb_chains: 15.282 + m1 = self.alignment.body[sequence][i] 15.283 + m2 = self.alignment.body[sequence][j] 15.284 + if m1 and m2: 15.285 + r1 = sequence.pdb_residues[chain][m1] 15.286 + r2 = sequence.pdb_residues[chain][m2] 15.287 + ca1 = r1['CA'] 15.288 + ca2 = r2['CA'] 15.289 + d = ca1 - ca2 # Bio.PDB feature 15.290 + distances.append(d) 15.291 + if len(distances) >= 2: 15.292 + delta = max(distances) - min(distances) 15.293 + if delta <= max_delta: 15.294 + lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta) 15.295 + graph = Graph(nodes, lines) 15.296 + cliques = graph.cliques(timeout=timeout, minsize=minsize) 15.297 + GCs = [] 15.298 + for clique in cliques: 15.299 + for GC in GCs: 15.300 + if len(clique - set(GC.positions)) < ac_new_atoms * len(clique): 15.301 + break 15.302 + 15.303 + def ca_atoms(self, sequence, pdb_chain): 15.304 + """ Iterates Ca-atom of monomers of this sequence from this block """ 15.305 + return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers()) 15.306 + 15.307 + def sequences_chains(self): 15.308 + """ Iterates pairs (sequence, chain) """ 15.309 + for sequence in self.alignment.sequences: 15.310 + if sequence in self.sequences: 15.311 + for chain in sequence.pdb_chains: 15.312 + yield (sequence, chain) 15.313 + 15.314 + def superimpose(self): 15.315 + """ Superimpose all pdb_chains in this block """ 15.316 + sequences_chains = list(self.sequences_chains()) 15.317 + if len(sequences_chains) >= 1: 15.318 + sup = Superimposer() 15.319 + fixed_sequence, fixed_chain = sequences_chains.pop() 15.320 + fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain) 15.321 + for sequence, chain in sequences_chains: 15.322 + moving_atoms = self.ca_atoms(sequence, chain) 15.323 + sup.set_atoms(fixed_atoms, moving_atoms) 15.324 + # Apply rotation/translation to the moving atoms 15.325 + sup.apply(moving_atoms) 15.326 + 15.327 + def pdb_save(self, out_file): 15.328 + """ Save all sequences 15.329 + 15.330 + Returns {(sequence, chain): CHAIN} 15.331 + CHAIN is chain letter in new file 15.332 + """ 15.333 + tmp_file = NamedTemporaryFile(delete=False) 15.334 + tmp_file.close() 15.335 + 15.336 + for sequence, chain in self.sequences_chains(): 15.337 + sequence.pdb_save(tmp_file.name, chain) 15.338 + # TODO: read from tmp_file.name 15.339 + # change CHAIN 15.340 + # add to out_file 15.341 + 15.342 + os.unlink(NamedTemporaryFile) 15.343 + 15.344 +# vim: set ts=4 sts=4 sw=4 et:
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/allpy/protein.py Tue Jan 25 16:03:00 2011 +0300 16.3 @@ -0,0 +1,53 @@ 16.4 +import base 16.5 +import data.codes 16.6 + 16.7 +class Monomer(base.Monomer): 16.8 + """Protein monomers: aminoacids.""" 16.9 + type = 'protein' 16.10 + by_code1 = {} 16.11 + by_code3 = {} 16.12 + by_name = {} 16.13 +Monomer._initialize(data.codes.protein) 16.14 + 16.15 +class Protein(list): 16.16 + """User defined protein; list of protein_sequences.""" 16.17 + pass 16.18 + 16.19 +class Sequence(base.Sequence): 16.20 + monomer_type = Monomer 16.21 + 16.22 +class Alignment(base.Alignment): 16.23 + sequence_type = Sequence 16.24 + 16.25 + def muscle_align(self): 16.26 + """ Simple align ths alignment using sequences (muscle) 16.27 + 16.28 + uses old Monomers and Sequences objects 16.29 + """ 16.30 + tmp_file = NamedTemporaryFile(delete=False) 16.31 + self.save_fasta(tmp_file) 16.32 + tmp_file.close() 16.33 + os.system("muscle -in %(tmp)s -out %(tmp)s" % {'tmp': tmp_file.name}) 16.34 + sequences, body = Alignment.from_file(open(tmp_file.name)) 16.35 + for sequence in self.sequences: 16.36 + try: 16.37 + new_sequence = [i for i in sequences if sequence==i][0] 16.38 + except: 16.39 + raise Exception("Align: Cann't find sequence %s in muscle output" % \ 16.40 + sequence.name) 16.41 + old_monomers = iter(sequence.monomers) 16.42 + self.body[sequence] = [] 16.43 + for monomer in body[new_sequence]: 16.44 + if not monomer: 16.45 + self.body[sequence].append(monomer) 16.46 + else: 16.47 + old_monomer = old_monomers.next() 16.48 + if monomer != old_monomer: 16.49 + raise Exception("Align: alignment errors") 16.50 + self.body[sequence].append(old_monomer) 16.51 + os.unlink(tmp_file.name) 16.52 + 16.53 +class Block(Alignment, base.Block): 16.54 + pass 16.55 + 16.56 +# vim: set ts=4 sts=4 sw=4 et:
17.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 17.2 +++ b/allpy/rna.py Tue Jan 25 16:03:00 2011 +0300 17.3 @@ -0,0 +1,21 @@ 17.4 +import base 17.5 +import data.codes 17.6 + 17.7 +class Monomer(base.Monomer): 17.8 + """RNA monomers: nucleotides.""" 17.9 + type = 'rna' 17.10 + by_code1 = {} 17.11 + by_code3 = {} 17.12 + by_name = {} 17.13 +Monomer._initialize(data.codes.rna) 17.14 + 17.15 +class Sequence(base.Sequence): 17.16 + monomer_type = Monomer 17.17 + 17.18 +class Alignment(base.Alignment): 17.19 + sequence_type = Sequence 17.20 + 17.21 +class Block(Alignment, base.Block): 17.22 + pass 17.23 + 17.24 +# vim: set ts=4 sts=4 sw=4 et:
18.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 18.2 +++ b/allpy/util.py Tue Jan 25 16:03:00 2011 +0300 18.3 @@ -0,0 +1,26 @@ 18.4 +"""Miscellanous utilities. 18.5 +""" 18.6 + 18.7 +def unzip(seq): 18.8 + """The oppozite of zip() builtin.""" 18.9 + a, b = [], [] 18.10 + for x, y in seq: 18.11 + a.append(x) 18.12 + b.append(y) 18.13 + return a, b 18.14 + 18.15 +def remove_each(string, substrings): 18.16 + """Remove each of substrings from string.""" 18.17 + for sub in substrings: 18.18 + string = string.replace(sub, "") 18.19 + return string 18.20 + 18.21 +class UserDict(dict): 18.22 + """Clone of dict that user may add attributes to.""" 18.23 + pass 18.24 + 18.25 +class UserList(list): 18.26 + """Clone of list that user may add attributes to.""" 18.27 + pass 18.28 + 18.29 +# vim: set et ts=4 sts=4 sw=4:
19.1 --- a/blocks3d/AlignmentSeq.py Mon Jan 24 21:40:10 2011 +0300 19.2 +++ b/blocks3d/AlignmentSeq.py Tue Jan 25 16:03:00 2011 +0300 19.3 @@ -16,7 +16,7 @@ 19.4 l=AlignmentSeq(seq_in) 19.5 for t in l.new_sequences: 19.6 print t 19.7 - 19.8 + 19.9 """ 19.10 19.11 19.12 @@ -44,7 +44,7 @@ 19.13 19.14 self.common - ?????? [???????] => [?????? ????, ?????? ??????? ?????] 19.15 """ 19.16 - 19.17 + 19.18 19.19 self.old_sequences = [] 19.20 self.new_sequences = [] 19.21 @@ -56,20 +56,20 @@ 19.22 19.23 self.common=[] 19.24 19.25 - 19.26 + 19.27 for i in xrange(0,len(self.old_sequences)): 19.28 self.unite(i) 19.29 19.30 19.31 for i in xrange(0,len(self.old_sequences)): 19.32 - self.lining(i) 19.33 -## 19.34 + self.lining(i) 19.35 +## 19.36 ## return 19.37 ## 19.38 ## self.correct() 19.39 -## 19.40 ## 19.41 -## 19.42 +## 19.43 +## 19.44 ## 19.45 ## for str1 in self.new_sequences: 19.46 ## self.connections.append(self.recount(str1)) 19.47 @@ -79,7 +79,7 @@ 19.48 19.49 19.50 19.51 - 19.52 + 19.53 def cost(self,a1,a2): 19.54 19.55 """ 19.56 @@ -90,7 +90,7 @@ 19.57 19.58 global matrix 19.59 global gaps 19.60 - 19.61 + 19.62 a1=a1.upper() 19.63 a2=a2.upper() 19.64 19.65 @@ -99,8 +99,6 @@ 19.66 return matrix[a1][a2] 19.67 19.68 return gaps[0] 19.69 - 19.70 - 19.71 19.72 19.73 19.74 @@ -108,7 +106,9 @@ 19.75 19.76 19.77 19.78 - 19.79 + 19.80 + 19.81 + 19.82 def gap_cost(self,int1): 19.83 """ 19.84 ??????? ????????? ????? ???????????? ????? ? 19.85 @@ -118,13 +118,13 @@ 19.86 19.87 global gaps 19.88 19.89 - 19.90 + 19.91 if (int1 >= len(gaps)): 19.92 return gaps[(len(gaps)-1)] 19.93 else: 19.94 return gaps[int1] 19.95 19.96 - 19.97 + 19.98 19.99 19.100 19.101 @@ -151,7 +151,7 @@ 19.102 str1=self.old_sequences[chainN] 19.103 len1=len(str1) 19.104 19.105 - 19.106 + 19.107 19.108 19.109 if (not self.common): 19.110 @@ -159,10 +159,10 @@ 19.111 while (i<len1): 19.112 aminoacids=[str1[i]] 19.113 chains=[chainN] 19.114 - 19.115 + 19.116 self.common.append([aminoacids,chains]) 19.117 i+=1 19.118 - 19.119 + 19.120 return 19.121 19.122 19.123 @@ -188,7 +188,7 @@ 19.124 tip_from[i].append(0) 19.125 19.126 19.127 -## 19.128 +## 19.129 ## for i in xrange(1,len1+1): 19.130 ## d[i][0] = 0 19.131 ## already_gaps[i][0] = [0,1+i] 19.132 @@ -197,7 +197,7 @@ 19.133 ## for j in xrange(1,len2+1): 19.134 ## d[0][j] = 0 19.135 ## already_gaps[0][j] = [1+j,0] 19.136 -## 19.137 +## 19.138 19.139 19.140 19.141 @@ -212,12 +212,12 @@ 19.142 costs.append(self.cost(str1[i-1],A)) 19.143 cost = max(costs) 19.144 19.145 - 19.146 + 19.147 insertion = d[i-1][j] 19.148 if (j != len2): # ?????????? ??? 19.149 insertion += self.gap_cost(already_gaps[i-1][j][1]) 19.150 19.151 - 19.152 + 19.153 deletion = d[i][j-1] 19.154 if (i != len1): # ?????????? ??? 19.155 deletion += self.gap_cost(already_gaps[i][j-1][0]) 19.156 @@ -229,32 +229,32 @@ 19.157 d[i][j] = max_way 19.158 19.159 19.160 - 19.161 - 19.162 - 19.163 + 19.164 + 19.165 + 19.166 if (max_way==substitution): 19.167 - tip=3 19.168 + tip=3 19.169 if (max_way==deletion): 19.170 - tip=2 19.171 + tip=2 19.172 if (max_way==insertion): 19.173 tip=1 19.174 - 19.175 + 19.176 19.177 19.178 if (tip==1): # insertion 19.179 already_gaps[i][j]=[0, (already_gaps[i-1][j][1]+1) ] 19.180 19.181 - 19.182 + 19.183 if (tip==2): # deletion 19.184 already_gaps[i][j]=[ (already_gaps[i][j-1][0]+1), 0 ] 19.185 19.186 if (tip==3): # substitution 19.187 already_gaps[i][j]=[ 0, 0 ] 19.188 - 19.189 + 19.190 tip_from[i][j]=tip 19.191 19.192 19.193 -## 19.194 +## 19.195 ## for d1 in d: 19.196 ## for d11 in d1: 19.197 ## print d11, 19.198 @@ -262,24 +262,24 @@ 19.199 ## 19.200 ## 19.201 ## 19.202 -## 19.203 +## 19.204 ## for d1 in tip_from: 19.205 ## for d11 in d1: 19.206 ## print d11, 19.207 ## print 19.208 ## 19.209 -## 19.210 +## 19.211 ## 19.212 ## print insertion 19.213 ## print deletion 19.214 ## print substitution 19.215 ## 19.216 ## 19.217 -## 19.218 +## 19.219 ## print already_gaps 19.220 -## 19.221 +## 19.222 19.223 - 19.224 + 19.225 19.226 i=len1 19.227 j=len2 19.228 @@ -288,43 +288,43 @@ 19.229 19.230 while (i>0 or j>0): 19.231 tip=tip_from[i][j] 19.232 - 19.233 + 19.234 if tip==1 or (j==0 and i>0): 19.235 19.236 aminoacids=[(str1[i-1])] 19.237 chains=[chainN] 19.238 - 19.239 + 19.240 common.append([aminoacids,chains]) 19.241 - 19.242 + 19.243 i-=1 19.244 19.245 19.246 - 19.247 + 19.248 if tip==2 or (i==0 and j>0): 19.249 - 19.250 + 19.251 common.append(self.common[j-1]) 19.252 j-=1 19.253 19.254 - 19.255 + 19.256 if (tip==3): 19.257 - 19.258 + 19.259 chains=self.common[j-1][1] 19.260 chains.append(chainN) 19.261 - 19.262 + 19.263 aminoacids=self.common[j-1][0] 19.264 - 19.265 + 19.266 if (not aminoacids.count(str1[i-1])): 19.267 aminoacids.append(str1[i-1]) 19.268 19.269 common.append([aminoacids,chains]) 19.270 - 19.271 + 19.272 i-=1 19.273 j-=1 19.274 19.275 - 19.276 - 19.277 + 19.278 + 19.279 common.reverse() 19.280 - 19.281 + 19.282 self.common=common 19.283 19.284 19.285 @@ -356,12 +356,12 @@ 19.286 ????? ??????? ????? ??????????? ?????????????????? 19.287 ? self.new_sequences 19.288 19.289 - chainN - ????? ???? 19.290 + chainN - ????? ???? 19.291 """ 19.292 19.293 str1=self.old_sequences[chainN] 19.294 len1=len(str1) 19.295 - 19.296 + 19.297 len2=len(self.common) 19.298 19.299 19.300 @@ -390,28 +390,28 @@ 19.301 ## def correct(self): 19.302 ## 19.303 ## new_sequences=[] 19.304 -## 19.305 +## 19.306 ## all_l = len(self.new_sequences[0]) # ?????????? ?? ???? ??????????????????? 19.307 ## for str1 in self.new_sequences: 19.308 ## all_l = min (all_l,len(str1)) 19.309 ## 19.310 ## 19.311 -## 19.312 +## 19.313 ## i=0 19.314 ## while (i < all_l): 19.315 -## 19.316 +## 19.317 ## if (i==0): 19.318 ## for str1 in self.new_sequences: 19.319 ## new_sequences.append(str1[0]) 19.320 ## i+=1 19.321 ## continue 19.322 ## 19.323 -## 19.324 +## 19.325 ## all_gaps=1 19.326 ## for str1 in self.new_sequences: 19.327 ## if (str1[i]!='-'): 19.328 ## all_gaps=0 19.329 -## 19.330 +## 19.331 ## if (all_gaps==1): 19.332 ## i+=1 19.333 ## continue 19.334 @@ -424,10 +424,10 @@ 19.335 ## if (str1[i]!='-' and new_sequences[j][-1]!='-'): 19.336 ## sovmest=0 19.337 ## break 19.338 -## 19.339 -## 19.340 ## 19.341 -## 19.342 +## 19.343 +## 19.344 +## 19.345 ## j=-1 19.346 ## for str1 in self.new_sequences: 19.347 ## j+=1 19.348 @@ -437,7 +437,7 @@ 19.349 ## new_sequences[j] = new_sequences[j][:-1] + str1[i] 19.350 ## else: 19.351 ## new_sequences[j] = new_sequences[j] + str1[i] 19.352 -## 19.353 +## 19.354 ## 19.355 ## 19.356 ## i+=1 19.357 @@ -446,7 +446,7 @@ 19.358 ## self.new_sequences = new_sequences 19.359 19.360 19.361 - 19.362 + 19.363 19.364 19.365 19.366 @@ -467,7 +467,7 @@ 19.367 ## """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 19.368 ## def recount(self,str1): 19.369 ## spisok={} 19.370 -## 19.371 +## 19.372 ## len1=len(str1) 19.373 ## old_nomer = 0 19.374 ## 19.375 @@ -487,4 +487,4 @@ 19.376 19.377 19.378 19.379 - 19.380 +
20.1 --- a/blocks3d/Blocks3D.py Mon Jan 24 21:40:10 2011 +0300 20.2 +++ b/blocks3d/Blocks3D.py Tue Jan 25 16:03:00 2011 +0300 20.3 @@ -23,7 +23,7 @@ 20.4 # determine html.htm path 20.5 if hasattr(Blocks3D_config, 'html_file'): 20.6 html_file = Blocks3D_config.html_file 20.7 - 20.8 + 20.9 else: 20.10 20.11 if sys.argv[0].replace('/', '') != sys.argv[0]: 20.12 @@ -129,8 +129,8 @@ 20.13 except: 20.14 all_right = 0 20.15 print "Error: bad option" 20.16 - 20.17 - 20.18 + 20.19 + 20.20 20.21 if s: 20.22 save = 0 20.23 @@ -159,7 +159,7 @@ 20.24 20.25 #if not Super_core: 20.26 # Super_core = 0 20.27 - 20.28 + 20.29 #Super_core = int(Super_core) 20.30 20.31 20.32 @@ -175,7 +175,7 @@ 20.33 print "Error: can not find input file" 20.34 except: 20.35 all_right = 0 20.36 - print "Error: can not find input file" 20.37 + print "Error: can not find input file" 20.38 20.39 20.40 if not (output_html_file): 20.41 @@ -209,7 +209,7 @@ 20.42 if not all_right: 20.43 20.44 print ' ' 20.45 - 20.46 + 20.47 print 'Programm for find blocks of true alignment' 20.48 20.49 print ' ' 20.50 @@ -276,14 +276,14 @@ 20.51 #if text.count("\n>") >= 2: 20.52 if text.count("\n>") >= 1: 20.53 # fasta 20.54 - 20.55 + 20.56 # write to temp file input fasta alignment 20.57 open(GC_temp, 'w').write(text) 20.58 - 20.59 + 20.60 else: 20.61 20.62 # not fasta 20.63 - 20.64 + 20.65 # convert input file to fasta 20.66 20.67 try: 20.68 @@ -303,7 +303,7 @@ 20.69 20.70 20.71 20.72 - 20.73 + 20.74 20.75 20.76 20.77 @@ -322,9 +322,9 @@ 20.78 current_id = '' # current fasta identifier 20.79 20.80 for fasta_string in fasta: 20.81 - 20.82 + 20.83 fasta_string = fasta_string.strip() 20.84 - 20.85 + 20.86 if not len(fasta_string): 20.87 continue 20.88 20.89 @@ -345,14 +345,14 @@ 20.90 # read conformity file 20.91 20.92 20.93 -conformity_dict = {} # Key - fasta_id. Value - list [pdb_code, pdb_chain] 20.94 +conformity_dict = {} # Key - fasta_id. Value - list [pdb_code, pdb_chain] 20.95 20.96 20.97 if conformity_file: 20.98 conformity = open(conformity_file).readlines() 20.99 20.100 for conformity_string in conformity: 20.101 - 20.102 + 20.103 if len(conformity_string) < 8: 20.104 continue 20.105 20.106 @@ -363,12 +363,12 @@ 20.107 exit() 20.108 20.109 fasta_id = conformity_list[0] 20.110 - 20.111 + 20.112 ## if not fasta_dict.has_key(fasta_id): 20.113 ## print 'Error: unknown fasta identifier "' + fasta_id + '" in conformity file' 20.114 -## exit() 20.115 +## exit() 20.116 20.117 - 20.118 + 20.119 conformity_pdb = conformity_list[1] 20.120 20.121 # if len(conformity_pdb) != 2: 20.122 @@ -379,7 +379,7 @@ 20.123 # print 'Error: can not read PDB code and chain "' + conformity_list[1] + '" in conformity file' 20.124 # exit() 20.125 20.126 - 20.127 + 20.128 conformity_dict[fasta_id] = conformity_pdb 20.129 20.130 20.131 @@ -392,16 +392,16 @@ 20.132 # ID_user[ID] = ID, entered by user 20.133 ID_user = {} 20.134 20.135 - 20.136 + 20.137 for fasta_id, sequence in fasta_dict.items(): 20.138 - 20.139 + 20.140 if conformity_dict.has_key(fasta_id): 20.141 fasta_id = conformity_dict[fasta_id] 20.142 - 20.143 - 20.144 + 20.145 + 20.146 pdb = B3D.from_ID(fasta_id) 20.147 - 20.148 - 20.149 + 20.150 + 20.151 20.152 if not pdb: 20.153 print 'Error: sequence identifier "' + fasta_id + '" in not valid' 20.154 @@ -411,7 +411,7 @@ 20.155 pdb_code, pdb_chain, model = pdb 20.156 20.157 block.append([pdb_code, pdb_chain, sequence, model]) 20.158 - 20.159 + 20.160 # save ID, entered by user 20.161 ID = B3D.to_ID(pdb_code, pdb_chain, model) 20.162 ID_user[ID] = fasta_id 20.163 @@ -431,8 +431,8 @@ 20.164 # run GeometricalCore_class to make core 20.165 ############################################## 20.166 20.167 - 20.168 - 20.169 + 20.170 + 20.171 B3D.min_width = min_width 20.172 B3D.timeout_2 = timeout_2 20.173 20.174 @@ -444,7 +444,7 @@ 20.175 B3D.timeout = timeout 20.176 B3D.pdb_url = pdb_url 20.177 20.178 - 20.179 + 20.180 # blocks is object with result 20.181 blocks = B3D.find_blocks(block) 20.182 20.183 @@ -455,13 +455,13 @@ 20.184 for block in blocks: 20.185 20.186 IDs_1 = [] 20.187 - 20.188 + 20.189 for ID in block['IDs']: 20.190 - 20.191 + 20.192 IDs_1.append(ID_user[ID]) 20.193 - 20.194 + 20.195 block['IDs'] = IDs_1 20.196 - 20.197 + 20.198 blocks_1.append(block) 20.199 20.200 20.201 @@ -470,25 +470,25 @@ 20.202 20.203 if output_html_file: 20.204 # produce HTML 20.205 - 20.206 + 20.207 t = '' 20.208 - 20.209 - 20.210 + 20.211 + 20.212 t += "blocks = json('" + json.dumps(blocks_1) + "');" 20.213 t += "fasta_dict = json('" + json.dumps(fasta_dict) + "');" 20.214 - 20.215 + 20.216 # t += "try{blocks_init();}catch(e){}" 20.217 - 20.218 - 20.219 - 20.220 + 20.221 + 20.222 + 20.223 html = ''.join(open(html_file).readlines()) 20.224 - 20.225 + 20.226 # insert out code instead of "self_js_text" 20.227 html = html.replace('self_js_text', t) 20.228 - 20.229 - 20.230 + 20.231 + 20.232 open(output_html_file, 'w').write(html) 20.233 - 20.234 + 20.235 20.236 20.237
21.1 --- a/blocks3d/Blocks3D_class.py Mon Jan 24 21:40:10 2011 +0300 21.2 +++ b/blocks3d/Blocks3D_class.py Tue Jan 25 16:03:00 2011 +0300 21.3 @@ -36,25 +36,25 @@ 21.4 21.5 User parameters: 21.6 21.7 - 21.8 + 21.9 """ 21.10 21.11 21.12 21.13 21.14 - 21.15 + 21.16 def __init__(self): 21.17 21.18 """ 21.19 Create new copy of class 21.20 """ 21.21 - 21.22 + 21.23 GC.__init__(self) 21.24 - 21.25 + 21.26 vars(self).update(vars(Blocks3D_config)) 21.27 - 21.28 - 21.29 - 21.30 + 21.31 + 21.32 + 21.33 21.34 21.35 21.36 @@ -67,7 +67,7 @@ 21.37 It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None] 21.38 SSNAKIDQLSSDAQ - part of alignment block sequence 21.39 Last parameter - model or None 21.40 - 21.41 + 21.42 21.43 Result: 21.44 returns list of blocks 21.45 @@ -77,7 +77,7 @@ 21.46 end: integer 21.47 IDs: list of IDs 21.48 """ 21.49 - 21.50 + 21.51 # # list of IDs 21.52 IDs = [] 21.53 21.54 @@ -85,109 +85,109 @@ 21.55 21.56 ID = self.to_ID(pdb_code, pdb_chain, model) 21.57 IDs.append(ID) 21.58 - 21.59 + 21.60 self.IDs = IDs 21.61 - 21.62 + 21.63 21.64 # build couple cores: 21.65 - 21.66 + 21.67 # CORES = <list of cores> 21.68 # core is list of positions 21.69 # CORES = [] 21.70 - 21.71 - 21.72 - 21.73 + 21.74 + 21.75 + 21.76 # couple_core_parts[ID_1][ID_2] = <list of parts> 21.77 # part = {'core': core_number, 'start': start_posotion, 'end': end_position} 21.78 couple_core_parts = {} 21.79 - 21.80 - 21.81 - 21.82 + 21.83 + 21.84 + 21.85 21.86 for protein_1 in alignment: 21.87 pdb_code_1, pdb_chain_1, seq_part_1, model_1 = protein_1 21.88 ID_1 = self.to_ID(pdb_code_1, pdb_chain_1, model_1) 21.89 21.90 couple_core_parts[ID_1] = {} 21.91 - 21.92 + 21.93 for protein_2 in alignment: 21.94 pdb_code_2, pdb_chain_2, seq_part_2, model_2 = protein_2 21.95 ID_2 = self.to_ID(pdb_code_2, pdb_chain_2, model_2) 21.96 - 21.97 + 21.98 if ID_1 == ID_2: 21.99 break 21.100 - 21.101 - 21.102 + 21.103 + 21.104 cores = self.check_block([protein_1, protein_2]) 21.105 - 21.106 + 21.107 # print ' ' 21.108 - 21.109 + 21.110 # print cores 21.111 - 21.112 - 21.113 + 21.114 + 21.115 couple_core_parts[ID_1][ID_2] = [] 21.116 - 21.117 + 21.118 all_cores = [] # list of positions of all cores 21.119 - 21.120 + 21.121 for core in cores: 21.122 - 21.123 + 21.124 parts = self.splice_to_continued(core, (ID_1, ID_2)) 21.125 - 21.126 + 21.127 for part in parts: 21.128 - 21.129 + 21.130 if len(part) >= self.min_width: 21.131 - 21.132 + 21.133 all_cores.extend(clon(part)) 21.134 - 21.135 + 21.136 all_cores = list(set(all_cores)) # unical elements 21.137 - 21.138 + 21.139 all_cores.sort() 21.140 - 21.141 - 21.142 + 21.143 + 21.144 parts = self.splice_to_continued(all_cores, (ID_1, ID_2)) 21.145 - 21.146 + 21.147 for part in parts: 21.148 - 21.149 + 21.150 if len(part) < self.min_width: 21.151 continue 21.152 - 21.153 + 21.154 couple_core_parts[ID_1][ID_2].append(clon({ 21.155 'start': part[0], 'end': part[-1]})) 21.156 21.157 couple_core_parts[ID_2][ID_1] = couple_core_parts[ID_1][ID_2] 21.158 - 21.159 + 21.160 # CORES.append(clon(core)) 21.161 - 21.162 - 21.163 - 21.164 + 21.165 + 21.166 + 21.167 # exit() 21.168 - 21.169 + 21.170 # list of elements of blocks graph 21.171 # element is (ID, pos) 21.172 graph_elements = [] 21.173 - 21.174 + 21.175 # reverse dict to graph_elements 21.176 graph_elements_1 = {} 21.177 - 21.178 - 21.179 + 21.180 + 21.181 # graph[i][j] == 1, if these elements are connected, or i == j 21.182 # i and j are indexes from list "graph_elements" 21.183 graph = {} 21.184 - 21.185 - 21.186 + 21.187 + 21.188 # same as graph, but include lists of cores n 21.189 # graph_cores = {} 21.190 - 21.191 - 21.192 - 21.193 + 21.194 + 21.195 + 21.196 # is_boundary[ID][pos] == 1, if pos is boundary of some part in ID 21.197 is_boundary = {} 21.198 - 21.199 - 21.200 - 21.201 + 21.202 + 21.203 + 21.204 for ID_1 in IDs: 21.205 - 21.206 - 21.207 + 21.208 + 21.209 # find boundaries 21.210 is_boundary[ID_1] = {} 21.211 for ID_2 in IDs: 21.212 @@ -196,70 +196,70 @@ 21.213 for part in couple_core_parts[ID_1][ID_2]: 21.214 is_boundary[ID_1][part['start']] = 1 21.215 is_boundary[ID_1][part['end']] = 1 21.216 - 21.217 21.218 - 21.219 - 21.220 + 21.221 + 21.222 + 21.223 for ID_2 in IDs: 21.224 - 21.225 + 21.226 if ID_1 == ID_2: 21.227 break 21.228 - 21.229 - 21.230 - 21.231 - 21.232 - 21.233 + 21.234 + 21.235 + 21.236 + 21.237 + 21.238 for part in couple_core_parts[ID_1][ID_2]: 21.239 - 21.240 + 21.241 start = part['start'] 21.242 end = part['end'] 21.243 - 21.244 + 21.245 # elements of graph like (ID, pos) 21.246 elements = [] 21.247 - 21.248 + 21.249 for pos in xrange(start, end + 1): 21.250 - 21.251 + 21.252 if not is_boundary[ID_1].has_key(pos) and not is_boundary[ID_2].has_key(pos): 21.253 continue 21.254 - 21.255 + 21.256 for ID in (ID_1, ID_2): 21.257 if not graph_elements_1.has_key((ID, pos)): 21.258 L = len(graph_elements) 21.259 graph_elements_1[(ID, pos)] = L 21.260 graph[L] = {} 21.261 # graph_cores[L] = {} 21.262 - 21.263 + 21.264 graph_elements.append((ID, pos)) 21.265 - 21.266 + 21.267 21.268 elements.append(graph_elements_1[(ID, pos)]) 21.269 - 21.270 - 21.271 + 21.272 + 21.273 for element_1 in elements: 21.274 for element_2 in elements: 21.275 graph[element_1][element_2] = 1 21.276 - 21.277 + 21.278 # # add information about cores 21.279 # if not graph_cores[element_1].has_key(element_2): 21.280 # graph_cores[element_1][element_2] = [] 21.281 # graph_cores[element_1][element_2].append(part['core']) 21.282 - 21.283 - 21.284 - 21.285 + 21.286 + 21.287 + 21.288 L = len(graph_elements) 21.289 - 21.290 + 21.291 for element_1 in xrange(0, L): 21.292 for element_2 in xrange(0, L): 21.293 if not graph[element_1].has_key(element_2): 21.294 graph[element_1][element_2] = 0 21.295 - 21.296 - 21.297 - 21.298 + 21.299 + 21.300 + 21.301 kliki_1 = Kliki(graph, min_size=0, timeout=self.timeout_2).kliki 21.302 - 21.303 - 21.304 -# 21.305 -# 21.306 + 21.307 + 21.308 +# 21.309 +# 21.310 # # graf illustration 21.311 # for i in graph.keys(): 21.312 # t = '' 21.313 @@ -269,441 +269,441 @@ 21.314 # else: 21.315 # t += ' ' 21.316 # print t 21.317 -# 21.318 -# 21.319 - 21.320 - 21.321 +# 21.322 +# 21.323 + 21.324 + 21.325 alignment_len = len(alignment[0][2]) 21.326 - 21.327 - 21.328 - 21.329 + 21.330 + 21.331 + 21.332 # occupied alignment elements 21.333 occupied = {} 21.334 - 21.335 + 21.336 for ID in IDs: 21.337 - 21.338 + 21.339 occupied[ID] = [0] * alignment_len 21.340 - 21.341 + 21.342 # 00000000 21.343 occupied_current0 = clon(occupied) 21.344 - 21.345 - 21.346 - 21.347 - 21.348 - 21.349 - 21.350 - 21.351 + 21.352 + 21.353 + 21.354 + 21.355 + 21.356 + 21.357 + 21.358 # print graph 21.359 - 21.360 + 21.361 # print kliki_1 21.362 - 21.363 - 21.364 + 21.365 + 21.366 rectangles = [] 21.367 - 21.368 + 21.369 for klika in kliki_1: 21.370 - 21.371 + 21.372 if self.rectangles_type == 'out': 21.373 - 21.374 - # transitivity 21.375 - 21.376 + 21.377 + # transitivity 21.378 + 21.379 IDs_dict = {} 21.380 start = alignment_len 21.381 end = 0 21.382 - 21.383 + 21.384 for element in klika: 21.385 - 21.386 + 21.387 ID, pos = graph_elements[element] 21.388 - 21.389 + 21.390 IDs_dict[ID] = 1 21.391 - 21.392 + 21.393 start = min(start, pos) 21.394 end = max(end, pos) 21.395 - 21.396 + 21.397 IDs = IDs_dict.keys() 21.398 - 21.399 - 21.400 + 21.401 + 21.402 rectangles.append({'IDs': IDs, 'start': start, 'end': end}) 21.403 - 21.404 + 21.405 else: 21.406 - 21.407 + 21.408 ID_start = {} 21.409 ID_end = {} 21.410 - 21.411 + 21.412 for element in klika: 21.413 - 21.414 + 21.415 ID, pos = graph_elements[element] 21.416 - 21.417 - 21.418 + 21.419 + 21.420 if not ID_start.has_key(ID): 21.421 ID_start[ID] = pos 21.422 ID_end[ID] = pos 21.423 else: 21.424 ID_start[ID] = min(ID_start[ID], pos) 21.425 ID_end[ID] = max(ID_end[ID], pos) 21.426 - 21.427 - 21.428 + 21.429 + 21.430 # empty table 21.431 occupied_current = clon(occupied_current0) 21.432 - 21.433 - 21.434 - 21.435 + 21.436 + 21.437 + 21.438 for ID, start in ID_start.items(): 21.439 end = ID_end[ID] 21.440 - 21.441 + 21.442 for pos in xrange(start, end + 1): 21.443 occupied_current[ID][pos] = 1 21.444 - 21.445 - 21.446 + 21.447 + 21.448 rectangles.extend(self.splice_to_rect(occupied_current)) 21.449 - 21.450 - 21.451 - 21.452 + 21.453 + 21.454 + 21.455 # sort by height and then by width 21.456 - 21.457 + 21.458 rectangles.sort(self.rectangles_compare) 21.459 - 21.460 - 21.461 - 21.462 - 21.463 - 21.464 - 21.465 - 21.466 - 21.467 - 21.468 + 21.469 + 21.470 + 21.471 + 21.472 + 21.473 + 21.474 + 21.475 + 21.476 + 21.477 rectangles_2 = [] 21.478 - 21.479 - 21.480 + 21.481 + 21.482 for rectangle in rectangles: 21.483 - 21.484 + 21.485 # empty table 21.486 occupied_current = clon(occupied_current0) 21.487 - 21.488 + 21.489 for ID in rectangle['IDs']: 21.490 - 21.491 + 21.492 for pos in xrange(rectangle['start'], rectangle['end'] + 1): 21.493 - 21.494 + 21.495 if not occupied[ID][pos]: 21.496 - 21.497 + 21.498 occupied_current[ID][pos] = 1 21.499 - 21.500 + 21.501 rectangles_current = self.splice_to_rect(occupied_current) 21.502 - 21.503 - 21.504 - 21.505 + 21.506 + 21.507 + 21.508 rectangles_2.extend(rectangles_current) 21.509 - 21.510 + 21.511 for rectangle in rectangles_current: 21.512 - 21.513 + 21.514 # if len(rectangle['IDs']) ==2 and rectangle['IDs'][1] == '1bw5_A_4': 21.515 # print occupied_current, rectangles 21.516 - 21.517 - 21.518 + 21.519 + 21.520 for ID in rectangle['IDs']: 21.521 for pos in xrange(rectangle['start'], rectangle['end'] + 1): 21.522 occupied[ID][pos] = 1 21.523 - 21.524 - 21.525 - 21.526 + 21.527 + 21.528 + 21.529 rectangles = clon(rectangles_2) 21.530 - 21.531 - 21.532 - 21.533 - 21.534 - 21.535 + 21.536 + 21.537 + 21.538 + 21.539 + 21.540 # delete blocks, including 1 sequence 21.541 - 21.542 + 21.543 rectangles = filter(self.rectangle_filter, rectangles) 21.544 - 21.545 - 21.546 + 21.547 + 21.548 return rectangles 21.549 - 21.550 - 21.551 - 21.552 -# 21.553 -# # transitivity 21.554 + 21.555 + 21.556 + 21.557 +# 21.558 +# # transitivity 21.559 # def splice_to_outrect(self, occupied_current, occupied): 21.560 # """ 21.561 # Splice multitude of alignment elements to rectangles 21.562 -# 21.563 +# 21.564 # Returns: 21.565 # new occupied_current (out rectangle) \ occupied 21.566 -# 21.567 +# 21.568 # """ 21.569 -# 21.570 +# 21.571 # width = len(occupied_current.values()[0]) 21.572 -# 21.573 +# 21.574 # start = width # the most right position 21.575 # end = 0 # the most left position 21.576 -# 21.577 +# 21.578 # IDs_dict = {} 21.579 -# 21.580 +# 21.581 # for ID, line in occupied_current.items(): 21.582 -# 21.583 +# 21.584 # for pos, condition in enumerate(line): 21.585 -# 21.586 +# 21.587 # if condition: 21.588 -# 21.589 +# 21.590 # IDs_dict[ID] = 1 21.591 # start = min(start, pos) 21.592 # end = max(end, pos) 21.593 -# 21.594 +# 21.595 # IDs = IDs_dict.keys() 21.596 -# 21.597 -# 21.598 -# 21.599 +# 21.600 +# 21.601 +# 21.602 # occupied_current_1 = {} 21.603 -# 21.604 -# 21.605 -# 21.606 +# 21.607 +# 21.608 +# 21.609 # for ID in IDs: 21.610 -# 21.611 +# 21.612 # occupied_current_1[ID] = [0] * width 21.613 -# 21.614 +# 21.615 # for pos in xrange(start, end + 1): 21.616 # 21.617 # if not occupied[ID][pos]: 21.618 # 21.619 # occupied_current_1[ID][pos] = 1 21.620 -# 21.621 +# 21.622 # return occupied_current_1 21.623 -# 21.624 -# 21.625 - 21.626 - 21.627 +# 21.628 +# 21.629 + 21.630 + 21.631 def rectangles_compare(self, x, y): 21.632 - 21.633 + 21.634 height_x = len(x['IDs']) 21.635 height_y = len(y['IDs']) 21.636 - 21.637 + 21.638 if height_x > height_y: 21.639 return -1 21.640 if height_x < height_y: 21.641 return 1 21.642 - 21.643 + 21.644 # same heights 21.645 - 21.646 + 21.647 width_x = x['end'] - x['start'] + 1 21.648 width_y = y['end'] - y['start'] + 1 21.649 - 21.650 + 21.651 if width_x > width_y: 21.652 return -1 21.653 if width_x < width_y: 21.654 return 1 21.655 - 21.656 + 21.657 return 0 21.658 - 21.659 - 21.660 - 21.661 - 21.662 + 21.663 + 21.664 + 21.665 + 21.666 def rectangle_filter(self, rectangle): 21.667 - 21.668 + 21.669 height = len(rectangle['IDs']) 21.670 - 21.671 + 21.672 width = rectangle['end'] - rectangle['start'] + 1 21.673 - 21.674 + 21.675 if height >= 2 and width >= self.min_width: 21.676 return True 21.677 else: 21.678 return False 21.679 - 21.680 - 21.681 - 21.682 - 21.683 + 21.684 + 21.685 + 21.686 + 21.687 def splice_to_rect(self, occupied_current): 21.688 """ 21.689 Splice multitude of alignment elements to rectangles 21.690 - 21.691 + 21.692 Returns list of rectangles: 21.693 rectangle is dict: 21.694 'start' 21.695 'end' 21.696 'IDs' - list 21.697 - 21.698 + 21.699 """ 21.700 - 21.701 + 21.702 # occupied_current_1 = splice_to_outrect(occupied_current, occupied) 21.703 21.704 occupied_current_1 = clon(occupied_current) 21.705 - 21.706 + 21.707 width = len(occupied_current_1.values()[0]) 21.708 - 21.709 - 21.710 + 21.711 + 21.712 rectangles = [] 21.713 - 21.714 + 21.715 rectangles_count = 0 21.716 - 21.717 - 21.718 - while 1: 21.719 - 21.720 + 21.721 + 21.722 + while 1: 21.723 + 21.724 excluded_IDs = {} 21.725 - 21.726 + 21.727 while 1: 21.728 - 21.729 + 21.730 IDs_occupied = {} # is ID occupied 21.731 - 21.732 + 21.733 height = {} # this position height 21.734 - 21.735 + 21.736 for pos in xrange(0, width): 21.737 - 21.738 + 21.739 height[pos] = 0 21.740 - 21.741 + 21.742 for ID, e in occupied_current_1.items(): 21.743 - 21.744 + 21.745 if excluded_IDs.has_key(ID): 21.746 continue 21.747 - 21.748 + 21.749 if e[pos]: 21.750 height[pos] += 1 21.751 - 21.752 + 21.753 IDs_occupied[ID] = 1 21.754 - 21.755 - 21.756 + 21.757 + 21.758 max_height = len(IDs_occupied.keys()) 21.759 - 21.760 - 21.761 + 21.762 + 21.763 if max_height <= 1: 21.764 break 21.765 - 21.766 - 21.767 - 21.768 + 21.769 + 21.770 + 21.771 max_height_positions = [] 21.772 - 21.773 + 21.774 for pos in xrange(0, width): 21.775 if height[pos] == max_height: 21.776 max_height_positions.append(pos) 21.777 - 21.778 - 21.779 + 21.780 + 21.781 parts = self.splice_to_continued(max_height_positions, IDs_occupied.keys()) 21.782 21.783 - 21.784 + 21.785 ok = 0 21.786 - 21.787 + 21.788 for part in parts: 21.789 - 21.790 + 21.791 if len(part) >= self.min_width: 21.792 - 21.793 + 21.794 # It is block! 21.795 - 21.796 + 21.797 IDs = IDs_occupied.keys() 21.798 start = part[0] 21.799 end = part[-1] 21.800 - 21.801 - 21.802 + 21.803 + 21.804 rectangles.append(clon({'IDs': IDs, 'start': start, 'end': end, 'cores':[]})) 21.805 - 21.806 - 21.807 + 21.808 + 21.809 # exclude elements 21.810 - 21.811 + 21.812 for ID in IDs: 21.813 for pos in xrange(start, end + 1): 21.814 occupied_current_1[ID][pos] = 0 21.815 - 21.816 - 21.817 - 21.818 + 21.819 + 21.820 + 21.821 ok = 1 21.822 - 21.823 + 21.824 if ok: 21.825 continue 21.826 - 21.827 - 21.828 - 21.829 - 21.830 + 21.831 + 21.832 + 21.833 + 21.834 # find the shortest sequence 21.835 - 21.836 - 21.837 + 21.838 + 21.839 max_width_dict = {} 21.840 - 21.841 + 21.842 for ID, e in occupied_current_1.items(): 21.843 - 21.844 + 21.845 if excluded_IDs.has_key(ID): 21.846 continue 21.847 - 21.848 + 21.849 positions = [] 21.850 - 21.851 + 21.852 for pos in xrange(0, width): 21.853 - 21.854 + 21.855 if height[pos] <= 1: 21.856 # "bad" position 21.857 continue 21.858 - 21.859 + 21.860 if e[pos]: 21.861 positions.append(pos) 21.862 - 21.863 + 21.864 if len(positions) == 0: 21.865 continue 21.866 - 21.867 + 21.868 parts = self.splice_to_continued(positions, [ID]) 21.869 - 21.870 + 21.871 max_part_len = 0 21.872 - 21.873 + 21.874 for part in parts: 21.875 max_part_len = max(len(part), max_part_len) 21.876 - 21.877 + 21.878 max_width_dict[ID] = max_part_len 21.879 - 21.880 - 21.881 + 21.882 + 21.883 if len(max_width_dict.values()): 21.884 min_width = min(max_width_dict.values()) 21.885 else: 21.886 break 21.887 - 21.888 + 21.889 # exclude these sequences 21.890 - 21.891 + 21.892 for ID, e in occupied_current_1.items(): 21.893 - 21.894 + 21.895 if max_width_dict.has_key(ID): 21.896 #if max_width_dict[ID] == min_width: 21.897 if max_width_dict[ID] <= min_width: 21.898 - 21.899 + 21.900 excluded_IDs[ID] = 1 21.901 - 21.902 + 21.903 # for pos in xrange(0, width): 21.904 # occupied_current_1[ID][pos] = 0 21.905 21.906 - 21.907 + 21.908 # print rectangles_count 21.909 - 21.910 + 21.911 if len(rectangles) == rectangles_count: 21.912 break 21.913 - 21.914 + 21.915 rectangles_count = len(rectangles) 21.916 - 21.917 + 21.918 # print rectangles_count 21.919 - 21.920 - 21.921 + 21.922 + 21.923 return rectangles 21.924 - 21.925 - 21.926 - 21.927 - 21.928 - 21.929 - 21.930 - 21.931 - 21.932 - 21.933 - 21.934 - 21.935 - 21.936 - 21.937 - 21.938 - 21.939 - 21.940 - 21.941 - 21.942 - 21.943 - 21.944 - 21.945 - 21.946 - 21.947 - 21.948 - 21.949 + 21.950 + 21.951 + 21.952 + 21.953 + 21.954 + 21.955 + 21.956 + 21.957 + 21.958 + 21.959 + 21.960 + 21.961 + 21.962 + 21.963 + 21.964 + 21.965 + 21.966 + 21.967 + 21.968 + 21.969 + 21.970 + 21.971 + 21.972 + 21.973 + 21.974 def splice_to_continued(self, LIST, IDs): 21.975 """ 21.976 LIST is a list of positions of alignment 21.977 - 21.978 + 21.979 returns list of continued lists, constituted source LIST 21.980 """ 21.981 - 21.982 + 21.983 LIST.sort() 21.984 21.985 21.986 @@ -713,38 +713,38 @@ 21.987 for pos in LIST: 21.988 21.989 if len(parts[-1]): 21.990 - 21.991 + 21.992 if pos - parts[-1][-1] == 1: 21.993 - 21.994 + 21.995 parts[-1].append(pos) 21.996 - 21.997 + 21.998 else: 21.999 - 21.1000 + 21.1001 # check omited sequence 21.1002 - 21.1003 + 21.1004 gappes = 1 # all omited sequence is gaps 21.1005 - 21.1006 + 21.1007 for x in xrange(parts[-1][-1] + 1, pos): 21.1008 - 21.1009 + 21.1010 for ID in IDs: 21.1011 if self.structure1[ID][x]: 21.1012 gappes = 0 21.1013 break 21.1014 - 21.1015 + 21.1016 if gappes == 0: 21.1017 break 21.1018 - 21.1019 + 21.1020 if not gappes: 21.1021 - 21.1022 + 21.1023 # There are not only gapes 21.1024 parts.append([pos]) 21.1025 - 21.1026 - else: 21.1027 + 21.1028 + else: 21.1029 # first element 21.1030 - 21.1031 + 21.1032 parts[-1].append(pos) 21.1033 - 21.1034 - 21.1035 + 21.1036 + 21.1037 return clon(parts) 21.1038 21.1039
22.1 --- a/blocks3d/GeometricalCore_class.py Mon Jan 24 21:40:10 2011 +0300 22.2 +++ b/blocks3d/GeometricalCore_class.py Tue Jan 25 16:03:00 2011 +0300 22.3 @@ -45,14 +45,14 @@ 22.4 22.5 22.6 22.7 -import os # to control file existence 22.8 +import os # to control file existence 22.9 22.10 import urllib2 # to upload PDB from Internet 22.11 22.12 import re # regulas expression 22.13 22.14 22.15 -from Kliki import Kliki # algorithm to find core(s) from graph 22.16 +from Kliki import Kliki # algorithm to find core(s) from graph 22.17 22.18 import AAdict # dict to convert 3-N code to 1-N 22.19 import AlignmentSeq # sequense alignment class 22.20 @@ -73,14 +73,14 @@ 22.21 22.22 self.structure 22.23 3d Coordinates of CA atoms of each structure 22.24 - 22.25 + 22.26 Formation: 22.27 self.structure[structure name][atom identifier] = list [x,y,z] 22.28 where: 22.29 structure name - string like "1jcc:A" 22.30 atom identifier - number of residue i PDB file, first is 0 22.31 x,y,x - Coordinates of CA atoms in PDB 22.32 - 22.33 + 22.34 22.35 self.seq 22.36 sequences of each structures 22.37 @@ -88,11 +88,11 @@ 22.38 Formation: 22.39 self.seq[structure name] = string like "SSNAKIDQLSSDAQ" 22.40 where: structure name - string like "1jcc:A" 22.41 - 22.42 + 22.43 self.structure1 - same as self.structure, but atom identifier is number in block 22.44 - 22.45 + 22.46 self.d - distances 22.47 - self.d[ID][i][j] = distance between i and j atoms in structure ID 22.48 + self.d[ID][i][j] = distance between i and j atoms in structure ID 22.49 22.50 22.51 22.52 @@ -109,13 +109,13 @@ 22.53 self.alternative_core_new_atoms - part of new atoms in alternative core 22.54 22.55 self.alternative_cores_count - max number of cores (including main core) 22.56 - 22.57 + 22.58 """ 22.59 22.60 22.61 22.62 22.63 - 22.64 + 22.65 def __init__(self): 22.66 22.67 """ 22.68 @@ -123,21 +123,21 @@ 22.69 22.70 Creates self vars self.structure and self.seq 22.71 """ 22.72 - 22.73 + 22.74 vars(self).update(vars(GeometricalCore_config)) 22.75 22.76 22.77 self.structure = {} 22.78 self.structure1 = {} 22.79 self.d = {} 22.80 - 22.81 - 22.82 + 22.83 + 22.84 self.seq = {} 22.85 self.pdb_text = {} # there will be PDB texts in this dict. key - string like "1jcc" 22.86 - 22.87 - 22.88 - 22.89 - 22.90 + 22.91 + 22.92 + 22.93 + 22.94 22.95 22.96 22.97 @@ -148,7 +148,7 @@ 22.98 If you use at first one boundaries (in positions) of block in some PDB file, 22.99 and then decide to change them, run this method before running check_block method 22.100 """ 22.101 - 22.102 + 22.103 self.structure1 = {} 22.104 self.d = {} 22.105 22.106 @@ -180,108 +180,108 @@ 22.107 # 22.108 # 22.109 # if alternative_cores_count != None: 22.110 -# self.alternative_cores_count = alternative_cores_count 22.111 +# self.alternative_cores_count = alternative_cores_count 22.112 # 22.113 # if timeout != None: 22.114 -# self.timeout = timeout 22.115 +# self.timeout = timeout 22.116 # 22.117 # 22.118 # 22.119 22.120 22.121 22.122 - 22.123 + 22.124 def to_ID(self, pdb_code, pdb_chain, model): 22.125 - 22.126 + 22.127 """ 22.128 Makes standart ID for structure from pdb_code, chain and model 22.129 """ 22.130 - 22.131 + 22.132 ID = pdb_code + '_' 22.133 - 22.134 + 22.135 if pdb_chain: 22.136 ID = ID + pdb_chain.upper().strip() 22.137 22.138 if model: 22.139 ID = ID + '_' + model 22.140 - 22.141 + 22.142 return ID 22.143 - 22.144 - 22.145 - 22.146 - 22.147 - 22.148 - 22.149 + 22.150 + 22.151 + 22.152 + 22.153 + 22.154 + 22.155 def from_ID(self, ID): 22.156 - 22.157 + 22.158 """ 22.159 Parse standart ID to pdb_code, chain and model 22.160 """ 22.161 - 22.162 - 22.163 + 22.164 + 22.165 if not hasattr(self, 're1'): 22.166 - 22.167 - 22.168 + 22.169 + 22.170 # makes compiled regular expressions 22.171 - 22.172 + 22.173 # for pdb-codes 22.174 self.re1 = re.compile(r"(^|[^a-z0-9])([0-9][0-9a-z]{3})([^a-z0-9]([0-9a-z ]?)([^a-z0-9]([0-9]{1,3}))?)?($|[^a-z0-9])") 22.175 - 22.176 + 22.177 # for files 22.178 self.re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$") 22.179 - 22.180 - 22.181 + 22.182 + 22.183 self.from_ID_dict = {} 22.184 - 22.185 - 22.186 - 22.187 - 22.188 + 22.189 + 22.190 + 22.191 + 22.192 if self.from_ID_dict.has_key(ID): 22.193 return self.from_ID_dict[ID] 22.194 - 22.195 - 22.196 + 22.197 + 22.198 if ID.lower().find('.ent') != -1 or ID.lower().find('.pdb') != -1: 22.199 # it is file 22.200 parseO = self.re2.search(ID) # files 22.201 else: 22.202 parseO = self.re1.search(ID.lower()) # pdb codes 22.203 - 22.204 - 22.205 + 22.206 + 22.207 if not parseO: 22.208 return None 22.209 - 22.210 + 22.211 parse = parseO.groups() 22.212 - 22.213 - 22.214 + 22.215 + 22.216 if len(parse) < 2: 22.217 return None 22.218 - 22.219 - 22.220 + 22.221 + 22.222 code = parse[1] 22.223 - 22.224 + 22.225 # if not file: 22.226 # code = code.lower() 22.227 - 22.228 + 22.229 chain = '' 22.230 model = None 22.231 - 22.232 + 22.233 if len(parse) >= 4: 22.234 chain = parse[3] 22.235 - 22.236 + 22.237 if chain: 22.238 chain = chain.upper() 22.239 - 22.240 + 22.241 if len(parse) >= 6: 22.242 if parse[5]: 22.243 model = parse[5] 22.244 - 22.245 - 22.246 - 22.247 + 22.248 + 22.249 + 22.250 self.from_ID_dict[ID] = (code, chain, model) # save parsing results 22.251 - 22.252 + 22.253 return code, chain, model 22.254 - 22.255 - 22.256 + 22.257 + 22.258 22.259 22.260 22.261 @@ -296,15 +296,15 @@ 22.262 It's element - list like ["1jcc", "A", "SSNAKIDQLSSDAQ", None] 22.263 SSNAKIDQLSSDAQ - part of alignment block sequence 22.264 Last parameter - model or None 22.265 - 22.266 + 22.267 22.268 Result: 22.269 returns list of core atom identifiers (first is 0) 22.270 22.271 Example: [0,1,2,3,4,6] 22.272 """ 22.273 - 22.274 - 22.275 + 22.276 + 22.277 22.278 22.279 # check lendth of input sequences 22.280 @@ -316,13 +316,13 @@ 22.281 print 'Sequences must have equal lengths' 22.282 print 'Task was aborted!' 22.283 return 22.284 - 22.285 22.286 22.287 - 22.288 22.289 22.290 - 22.291 + 22.292 + 22.293 + 22.294 pdb_list = [] # list of uploading PDB names 22.295 22.296 for pdb_code, pdb_chain, seq_part, model in block: 22.297 @@ -330,12 +330,12 @@ 22.298 ID = self.to_ID(pdb_code, pdb_chain, model) 22.299 22.300 if not self.seq.has_key(ID) or not self.structure.has_key(ID): 22.301 - 22.302 + 22.303 # pdb_code = pdb_code.lower() 22.304 - 22.305 + 22.306 if pdb_chain: 22.307 pdb_chain = pdb_chain.upper() 22.308 - 22.309 + 22.310 pdb_list.append((pdb_code, pdb_chain, model)) 22.311 22.312 22.313 @@ -348,41 +348,41 @@ 22.314 for pdb_code, pdb_chain, seq_part, model in block: 22.315 22.316 ID = self.to_ID(pdb_code, pdb_chain, model) 22.317 - 22.318 - 22.319 + 22.320 + 22.321 22.322 22.323 22.324 if not self.seq.has_key(ID) or not self.structure.has_key(ID): # no structure :( 22.325 print 'Undefined structuce ' + ID 22.326 print 'Task was aborted!' 22.327 - return 22.328 + return 22.329 22.330 if not self.seq[ID] or not self.structure[ID]: # no structure :( 22.331 print 'Undefined structuce ' + ID 22.332 print 'Task was aborted!' 22.333 return 22.334 - 22.335 22.336 22.337 22.338 - 22.339 22.340 -# absent = {} # [position identifier] = 1, if some structure has not this atom 22.341 + 22.342 + 22.343 +# absent = {} # [position identifier] = 1, if some structure has not this atom 22.344 22.345 # structure = {} # [structure name][position in block] = list [x,y,z] 22.346 - 22.347 + 22.348 IDs = [] 22.349 - 22.350 + 22.351 for pdb_code, pdb_chain, seq_part, model in block: 22.352 22.353 ID = self.to_ID(pdb_code, pdb_chain, model) 22.354 - 22.355 + 22.356 IDs.append(ID) 22.357 22.358 if self.structure1.has_key(ID): 22.359 continue 22.360 - 22.361 + 22.362 22.363 # alignment part of sequence with full sequence 22.364 connections = self.find_sequence_piece(self.seq[ID], seq_part) 22.365 @@ -393,7 +393,7 @@ 22.366 self.structure1[ID] = [] 22.367 22.368 22.369 - 22.370 + 22.371 for i in xrange(0, len(connections)): 22.372 if connections[i] == '-': # there is not required position in PDB 22.373 self.structure1[ID].append(None) 22.374 @@ -434,15 +434,15 @@ 22.375 22.376 # 22.377 # # save CA atom coordinates 22.378 -# self.structure1 = structure 22.379 +# self.structure1 = structure 22.380 # 22.381 - 22.382 + 22.383 return cores 22.384 22.385 22.386 22.387 22.388 - 22.389 + 22.390 22.391 22.392 22.393 @@ -465,7 +465,7 @@ 22.394 codes = [] # list of PDB codes 22.395 22.396 # get texts of each PDB 22.397 - 22.398 + 22.399 for pdb_code, pdb_chain, model in pdb_list: 22.400 if not codes.count(pdb_code): 22.401 codes.append(pdb_code) 22.402 @@ -475,53 +475,53 @@ 22.403 22.404 22.405 # check PDB file existance in folder ./pdb/ 22.406 - 22.407 + 22.408 fname = 'pdb/' + pdb_code 22.409 - 22.410 + 22.411 if pdb_code.lower().find('.ent') == -1 and pdb_code.lower().find('.pdb') == -1: 22.412 # pdb code 22.413 fname += '.ent' 22.414 - 22.415 - 22.416 + 22.417 + 22.418 if os.path.exists(fname): 22.419 self.pdb_text[pdb_code] = open(fname).readlines() 22.420 else: 22.421 - # try to upload from Internet 22.422 + # try to upload from Internet 22.423 try: 22.424 22.425 url = self.pdb_url.replace('XXXX', pdb_code) 22.426 - 22.427 + 22.428 self.pdb_text[pdb_code] = urllib2.urlopen(url).readlines() 22.429 22.430 - 22.431 + 22.432 except: 22.433 self.pdb_text[pdb_code] = '' 22.434 - 22.435 - 22.436 + 22.437 + 22.438 if self.save: 22.439 # save information 22.440 - 22.441 + 22.442 try: 22.443 - 22.444 - 22.445 + 22.446 + 22.447 # check folder "/pdb" existance 22.448 - 22.449 + 22.450 if not os.path.exists('pdb'): 22.451 - 22.452 + 22.453 # make folder 22.454 os.mkdir('pdb'); 22.455 - 22.456 - 22.457 + 22.458 + 22.459 pdbfile_to_save = open('pdb/' + pdb_code + '.ent', 'w') 22.460 pdbfile_to_save.write(''.join(self.pdb_text[pdb_code])) 22.461 pdbfile_to_save.close() 22.462 - 22.463 + 22.464 print 'PDB structure saved to file pdb/' + pdb_code + '.ent' 22.465 - 22.466 + 22.467 except: 22.468 - 22.469 + 22.470 print 'Can not write file pdb/' + pdb_code + '.ent' 22.471 - 22.472 + 22.473 22.474 if not self.pdb_text[pdb_code]: 22.475 #print self.pdb_url.replace('XXXX', pdb_code) 22.476 @@ -532,43 +532,43 @@ 22.477 # We have all PDB texts 22.478 22.479 22.480 - # read all texts and get CA atoms coordinates 22.481 + # read all texts and get CA atoms coordinates 22.482 22.483 for pdb_code, pdb_chain, model in pdb_list: 22.484 - 22.485 + 22.486 ID = self.to_ID(pdb_code, pdb_chain, model) 22.487 - 22.488 + 22.489 S = self.read_pdb(pdb_code, pdb_chain, self.pdb_text[pdb_code], model) 22.490 self.structure[ID], self.seq[ID] = S 22.491 22.492 22.493 if not self.seq[ID]: 22.494 print 'Can not find sequence for "' + ID + '"' 22.495 - return 22.496 - 22.497 + return 22.498 22.499 22.500 22.501 - 22.502 + 22.503 + 22.504 22.505 def read_pdb(self, pdb_code, pdb_chain, pdb_text, model): 22.506 """ 22.507 Reads pdb_text 22.508 - 22.509 + 22.510 Returns (structure, sequence) 22.511 structure (key - string like "1jcc:A") 22.512 This element is dict with [x,y,z] coordinates of each aa 22.513 sequence 22.514 """ 22.515 22.516 - 22.517 + 22.518 22.519 structure = [] # dict with coordinates list 22.520 seq = '' # sequense (string) 22.521 - 22.522 - 22.523 + 22.524 + 22.525 Model_already = 0 # is it our model now 22.526 - 22.527 + 22.528 if not model: 22.529 Model_already = 1 22.530 22.531 @@ -577,52 +577,52 @@ 22.532 22.533 22.534 for currentline in pdb_text: 22.535 - 22.536 - 22.537 + 22.538 + 22.539 if not Model_already: 22.540 if currentline[0:5] == 'MODEL': 22.541 if currentline[10:14].strip() == model: 22.542 Model_already = 1 22.543 continue 22.544 - 22.545 + 22.546 continue 22.547 - 22.548 - 22.549 - 22.550 + 22.551 + 22.552 + 22.553 if currentline[0:6] == 'ENDMDL': 22.554 break 22.555 22.556 22.557 if len(currentline) < 54: 22.558 continue 22.559 - 22.560 - 22.561 + 22.562 + 22.563 22.564 if currentline[0:4] != "ATOM": 22.565 continue 22.566 - 22.567 - 22.568 + 22.569 + 22.570 atomType = currentline[12:16].strip() 22.571 22.572 if atomType != "CA": 22.573 continue 22.574 - 22.575 - 22.576 + 22.577 + 22.578 thisChain = currentline[21].strip() 22.579 22.580 if thisChain != pdb_chain: 22.581 continue 22.582 - 22.583 - 22.584 + 22.585 + 22.586 alterCode = currentline[16] # Alter code 22.587 - 22.588 + 22.589 if alterCode != ' ' and alterCode != 'A': 22.590 continue 22.591 22.592 - 22.593 + 22.594 thisAminoAcid = currentline[17:20].strip() 22.595 22.596 - 22.597 + 22.598 thisX = float(currentline[30:38].strip()) 22.599 thisY = float(currentline[38:46].strip()) 22.600 thisZ = float(currentline[46:54].strip()) 22.601 @@ -637,7 +637,7 @@ 22.602 seq = seq + AAdict[thisAminoAcid][0] 22.603 else: 22.604 seq = seq + 'x' 22.605 - 22.606 + 22.607 return (structure, seq) 22.608 22.609 22.610 @@ -645,35 +645,34 @@ 22.611 22.612 22.613 22.614 -### 22.615 -### 22.616 -### 22.617 +### 22.618 +### 22.619 +### 22.620 ### def add_pdb(self, pdb_code, pdb_chain, pdb_text, model): 22.621 -### 22.622 +### 22.623 ### """ 22.624 ### Method read PDB and get coordinates of CA atoms 22.625 -### 22.626 +### 22.627 ### Arguments: 22.628 ### pdb_code - PDB name, for example "1jcc" 22.629 ### pdb_chain - chain, for example "A" 22.630 ### pdb_text - PDB file strings 22.631 -### 22.632 -### 22.633 +### 22.634 +### 22.635 ### Result: 22.636 ### New element will be added in dict self.structure (key - string like "1jcc:A") 22.637 ### This element is dict with [x,y,z] coordinates of each aa 22.638 -### 22.639 +### 22.640 ### """ 22.641 -### 22.642 +### 22.643 ### ID = self.to_ID(pdb_code, pdb_chain, model) 22.644 -### 22.645 +### 22.646 ### self.structure[ID], self.seq[ID] = read_pdb(pdb_code, pdb_chain, pdb_text, model) 22.647 -### 22.648 -### 22.649 -### 22.650 +### 22.651 +### 22.652 +### 22.653 22.654 22.655 - 22.656 22.657 22.658 22.659 @@ -689,8 +688,9 @@ 22.660 22.661 22.662 22.663 - 22.664 - 22.665 + 22.666 + 22.667 + 22.668 22.669 22.670 22.671 @@ -730,10 +730,10 @@ 22.672 22.673 22.674 # At first, calculate conformity without gapes 22.675 - 22.676 + 22.677 connections0 = [] # key - seq_part position, value - position in seq_part without gapes 22.678 part_usage = 0 # how many aa from seqpart were used 22.679 - 22.680 + 22.681 for i in xrange(0, len(seq_part)): 22.682 s = seq_part[i] 22.683 if s == '-': 22.684 @@ -741,15 +741,15 @@ 22.685 else: 22.686 connections0.append(part_usage) 22.687 part_usage += 1 22.688 - 22.689 - 22.690 22.691 22.692 22.693 22.694 22.695 22.696 - 22.697 + 22.698 + 22.699 + 22.700 22.701 connections1 = [] 22.702 22.703 @@ -769,7 +769,7 @@ 22.704 connections1.append('-') 22.705 # print 'Can not find', AA_part, len(connections1), 'of', seq_part, 'in', seq_all 22.706 continue 22.707 - 22.708 + 22.709 22.710 connections1.append(all_usage - 1) 22.711 22.712 @@ -788,10 +788,10 @@ 22.713 22.714 22.715 22.716 - 22.717 - 22.718 - 22.719 - 22.720 + 22.721 + 22.722 + 22.723 + 22.724 22.725 22.726 22.727 @@ -805,51 +805,51 @@ 22.728 22.729 """ 22.730 Calculates distances 22.731 - 22.732 + 22.733 adds new elements to self.d 22.734 """ 22.735 22.736 22.737 # Atom count 22.738 aton_count = len(self.structure1.values()[0]) 22.739 - 22.740 + 22.741 for ID, structure in self.structure1.items(): 22.742 - 22.743 + 22.744 if self.d.has_key(ID): 22.745 continue # already done 22.746 - 22.747 + 22.748 self.d[ID] = [] 22.749 22.750 22.751 - # create distance matrix 22.752 + # create distance matrix 22.753 for atom1 in xrange(0, aton_count): 22.754 self.d[ID].append([]) 22.755 for atom2 in xrange(0, aton_count): 22.756 self.d[ID][atom1].append(None) 22.757 22.758 22.759 - # done distance matrix 22.760 - 22.761 + # done distance matrix 22.762 + 22.763 for atom1 in xrange(0, aton_count): 22.764 for atom2 in xrange(0, aton_count): 22.765 22.766 if atom2 == atom1: 22.767 break 22.768 - 22.769 + 22.770 if structure[atom1] and structure[atom2]: 22.771 22.772 dist = 0 22.773 22.774 for xyz in xrange(0, 3): 22.775 - 22.776 + 22.777 dist += (structure[atom1][xyz] - structure[atom2][xyz]) ** 2 22.778 22.779 dist = dist ** 0.5 22.780 22.781 self.d[ID][atom1][atom2] = dist 22.782 self.d[ID][atom2][atom1] = dist 22.783 - 22.784 - 22.785 + 22.786 + 22.787 22.788 22.789 22.790 @@ -865,12 +865,12 @@ 22.791 22.792 Arguments: 22.793 IDs - list of IDs to study 22.794 - 22.795 - 22.796 + 22.797 + 22.798 Result: 22.799 returns [alignment core graph, cost graf] 22.800 22.801 - 22.802 + 22.803 alignment core graph example: 22.804 graf[0][1] = 1 0 and 1 positions are connected 22.805 graf[0][1] = 0 0 and 1 positions are not connected 22.806 @@ -882,19 +882,19 @@ 22.807 22.808 graf = {} 22.809 22.810 - cost = {} # distance spreading 22.811 + cost = {} # distance spreading 22.812 22.813 - 22.814 - 22.815 - 22.816 - 22.817 + 22.818 + 22.819 + 22.820 + 22.821 for atom1 in xrange(0, aton_count): 22.822 22.823 - 22.824 + 22.825 graf[atom1] = {} 22.826 22.827 cost[atom1] = {} 22.828 - 22.829 + 22.830 for atom2 in xrange(0, aton_count): 22.831 22.832 22.833 @@ -902,26 +902,26 @@ 22.834 graf[atom1][atom2] = 1 22.835 break 22.836 22.837 - 22.838 + 22.839 distances = [] 22.840 - 22.841 + 22.842 for ID in IDs: 22.843 distances.append(self.d[ID][atom1][atom2]) 22.844 22.845 - 22.846 + 22.847 if distances.count(None): 22.848 graf[atom1][atom2] = 0 22.849 graf[atom2][atom1] = 0 22.850 22.851 else: 22.852 - 22.853 + 22.854 spreading = max(distances) - min(distances) 22.855 22.856 if spreading > self.delta: 22.857 - 22.858 + 22.859 graf[atom1][atom2] = 0 22.860 graf[atom2][atom1] = 0 22.861 - 22.862 + 22.863 else: 22.864 graf[atom1][atom2] = 1 22.865 graf[atom2][atom1] = 1 22.866 @@ -929,7 +929,7 @@ 22.867 cost[atom1][atom2] = -spreading 22.868 cost[atom2][atom1] = -spreading 22.869 22.870 - 22.871 + 22.872 22.873 return [graf, cost] 22.874 22.875 @@ -966,7 +966,7 @@ 22.876 if self.alternative_core_new_atoms: 22.877 22.878 # compare this core with all added cores 22.879 - 22.880 + 22.881 for one_core in new_cores: 22.882 new_atoms = 0 22.883 22.884 @@ -980,15 +980,12 @@ 22.885 22.886 22.887 # if this core is good 22.888 - if ok == 1: 22.889 + if ok == 1: 22.890 new_cores.append(core) 22.891 22.892 22.893 22.894 return new_cores 22.895 - 22.896 - 22.897 - 22.898 22.899 22.900 22.901 @@ -1006,3 +1003,6 @@ 22.902 22.903 22.904 22.905 + 22.906 + 22.907 +
23.1 --- a/blocks3d/Kliki.py Mon Jan 24 21:40:10 2011 +0300 23.2 +++ b/blocks3d/Kliki.py Tue Jan 25 16:03:00 2011 +0300 23.3 @@ -34,14 +34,14 @@ 23.4 23.5 compsub - ?????? ??????? ??? ??????? ???? 23.6 """ 23.7 - 23.8 23.9 - 23.10 23.11 23.12 23.13 23.14 - 23.15 + 23.16 + 23.17 + 23.18 def __init__ (self, graf, cost = None, limit_count=0, min_size=0, timeout=10): 23.19 23.20 """ 23.21 @@ -57,7 +57,7 @@ 23.22 ?????? ???? ?????? ????? ??????????. 23.23 ??? ?????? ????, ??? ???? ??????? ??? ????? 23.24 ???????????? ??? fast_algorithm ? ??? ?????????? ???? 23.25 - 23.26 + 23.27 23.28 limit_count - ???????????? ????? ????, ??????? ????? 23.29 ???? ?????? 0, ?? ??????????? ??? ????? 23.30 @@ -65,15 +65,15 @@ 23.31 min_size - min size of returning klika 23.32 23.33 timeout - time in sec. for BRON-KERBOSH algorithm 23.34 - 23.35 + 23.36 """ 23.37 23.38 - 23.39 + 23.40 23.41 self.graf = graf 23.42 self.cost = cost 23.43 23.44 - 23.45 + 23.46 self.kliki = [] 23.47 23.48 self.timeout = timeout 23.49 @@ -95,40 +95,40 @@ 23.50 while deleted: 23.51 23.52 deleted = 0 23.53 - 23.54 + 23.55 for atom, c in connections.items(): 23.56 - 23.57 + 23.58 if c < min_size: 23.59 - 23.60 + 23.61 del connections[atom] 23.62 - 23.63 + 23.64 for atom1, connect in graf[atom].items(): 23.65 if connect == 1 and connections.has_key(atom1): 23.66 connections[atom1] -= 1 23.67 deleted = 1 23.68 23.69 - 23.70 - 23.71 - 23.72 + 23.73 + 23.74 + 23.75 23.76 bank_l = {} 23.77 - 23.78 + 23.79 for atom, c in connections.items(): 23.80 - 23.81 + 23.82 if not bank_l.has_key(c): 23.83 bank_l[c] = [] 23.84 - 23.85 + 23.86 bank_l[c].append(atom) 23.87 23.88 23.89 - keys = [] 23.90 - 23.91 + keys = [] 23.92 + 23.93 if len(bank_l.keys()): 23.94 for c in xrange(min(bank_l.keys()), max(bank_l.keys())+1): 23.95 if bank_l.has_key(c): 23.96 keys.extend(bank_l[c]) 23.97 - 23.98 - 23.99 + 23.100 + 23.101 23.102 23.103 # RUN BRON-KERBOSH 23.104 @@ -141,7 +141,7 @@ 23.105 self.fast_algorithm(keys[:]) # run fast algorithm 23.106 23.107 23.108 - 23.109 + 23.110 23.111 23.112 # ?????????? ?????????? ????? ?? ???????? ????? ????????? ? ??? 23.113 @@ -154,7 +154,7 @@ 23.114 ## min_l=len(self.kliki[0]) # ??????????? ?????? 23.115 23.116 bank_l = {} 23.117 - 23.118 + 23.119 for klika in self.kliki: 23.120 klika.sort() 23.121 l = len(klika) # ????? ??????? ????? 23.122 @@ -173,12 +173,12 @@ 23.123 kliki=[] 23.124 23.125 #print self.cost 23.126 - 23.127 + 23.128 if len(bank_l.keys()): 23.129 - 23.130 + 23.131 r = range(min(bank_l.keys()), max(bank_l.keys())+1) 23.132 r.reverse() 23.133 - 23.134 + 23.135 for l in r: 23.136 if (bank_l.has_key(l)): 23.137 23.138 @@ -191,7 +191,7 @@ 23.139 23.140 # ??????????? ?? ?? ???????? ????? ???? cost 23.141 23.142 - 23.143 + 23.144 23.145 costs = [] 23.146 23.147 @@ -200,7 +200,7 @@ 23.148 c = 0 23.149 23.150 for i in klika: 23.151 - 23.152 + 23.153 if not self.cost.has_key(i): 23.154 continue 23.155 23.156 @@ -209,7 +209,7 @@ 23.157 if j == i: 23.158 break 23.159 23.160 - 23.161 + 23.162 if not self.cost[i].has_key(j): 23.163 continue 23.164 23.165 @@ -233,15 +233,15 @@ 23.166 del costs[n] 23.167 23.168 k = k1 23.169 - 23.170 + 23.171 kliki.extend(k) 23.172 - 23.173 + 23.174 ## kliki.reverse() 23.175 23.176 if limit_count: 23.177 if len(kliki) > limit_count: # ??????? ??????????? ?? ????? ???? 23.178 kliki = kliki[:limit_count] 23.179 - 23.180 + 23.181 23.182 self.kliki = kliki[:] 23.183 23.184 @@ -249,10 +249,10 @@ 23.185 23.186 23.187 23.188 - 23.189 - 23.190 23.191 - 23.192 + 23.193 + 23.194 + 23.195 23.196 def bron_kerbosh (self, keys): 23.197 23.198 @@ -273,9 +273,9 @@ 23.199 23.200 23.201 print 'Bron and Kerbosh algorithm started' 23.202 - 23.203 + 23.204 start_time = time.time() 23.205 - 23.206 + 23.207 # ????... 23.208 while 1: 23.209 23.210 @@ -284,14 +284,14 @@ 23.211 23.212 23.213 23.214 - 23.215 + 23.216 23.217 # ???????? candidates ? used ?? ?????? 23.218 23.219 #print depth 23.220 - 23.221 + 23.222 candidates = list_candidates[depth][:] 23.223 - used = list_used[depth][:] 23.224 + used = list_used[depth][:] 23.225 23.226 23.227 23.228 @@ -301,25 +301,25 @@ 23.229 depth -= 1 23.230 23.231 if compsub: 23.232 - compsub.pop() 23.233 + compsub.pop() 23.234 continue 23.235 23.236 - 23.237 23.238 23.239 - 23.240 - 23.241 + 23.242 + 23.243 + 23.244 # ? used ?? ???????? ???????, ??????????? ?? ????? ????????? ?? candidates 23.245 # (??? ?? used ?? ????????? ???? ?? ? 1 ?? candidates) 23.246 23.247 used_candidates = 0 23.248 - 23.249 + 23.250 for used1 in used: 23.251 for candidates1 in candidates: 23.252 if self.graf[used1][candidates1] == 0: 23.253 break 23.254 else: 23.255 - used_candidates = 1 23.256 + used_candidates = 1 23.257 23.258 if used_candidates: 23.259 depth -= 1 23.260 @@ -327,8 +327,8 @@ 23.261 if compsub: 23.262 compsub.pop() 23.263 continue 23.264 - 23.265 - 23.266 + 23.267 + 23.268 23.269 23.270 23.271 @@ -341,7 +341,7 @@ 23.272 23.273 23.274 23.275 - 23.276 + 23.277 # ????????? new_candidates ? new_used, ?????? ?? candidates ? used ???????, ?? ?????????? ? v 23.278 # (?? ????, ???????? ?????? ?????????? ? v) 23.279 new_candidates = [] 23.280 @@ -349,7 +349,7 @@ 23.281 if self.graf[candidates1][v] == 1 and candidates1 != v: 23.282 new_candidates.append(candidates1) 23.283 23.284 - 23.285 + 23.286 new_used = [] 23.287 for used1 in used: 23.288 if self.graf[used1][v] == 1 and used1 != v: 23.289 @@ -358,7 +358,7 @@ 23.290 23.291 23.292 23.293 - # ??????? v ?? candidates ? ???????? ? used 23.294 + # ??????? v ?? candidates ? ???????? ? used 23.295 del list_candidates[depth][0] 23.296 list_used[depth].append(v) 23.297 23.298 @@ -367,33 +367,33 @@ 23.299 if len(new_candidates) == 0 and len(new_used) == 0: 23.300 # compsub ? ????? 23.301 self.kliki.append(compsub[:]) 23.302 - 23.303 + 23.304 else: 23.305 # ????? ?????????? ???????? bron_kerbosh(new_candidates, new_used) 23.306 23.307 depth += 1 23.308 - 23.309 + 23.310 23.311 # TIMEOUT check start 23.312 if self.timeout != -1: 23.313 - 23.314 + 23.315 if time.time() - start_time > self.timeout: 23.316 23.317 self.kliki = [] 23.318 return 23.319 # TIMEOUT check end 23.320 23.321 - 23.322 - 23.323 - 23.324 + 23.325 + 23.326 + 23.327 if depth >= len(list_candidates): 23.328 list_candidates.append([]) 23.329 list_used.append([]) 23.330 23.331 - 23.332 + 23.333 list_candidates[depth] = new_candidates[:] 23.334 list_used[depth] = new_used[:] 23.335 - 23.336 + 23.337 continue 23.338 23.339 23.340 @@ -442,7 +442,7 @@ 23.341 23.342 if not excluded.has_key(i): 23.343 keys1.append(i) 23.344 - 23.345 + 23.346 if len(keys1) == 0: 23.347 break 23.348 23.349 @@ -454,9 +454,9 @@ 23.350 connections = {} # index - atom, value - connections value 23.351 23.352 for i in keys1: 23.353 - 23.354 + 23.355 connections[i] = 0 23.356 - 23.357 + 23.358 for j in keys1: 23.359 23.360 if i != j and self.graf[i][j]: 23.361 @@ -466,8 +466,8 @@ 23.362 if max(connections.values()) == min(connections.values()): 23.363 # all atoms are equal 23.364 break 23.365 - 23.366 - 23.367 + 23.368 + 23.369 exclude_connect = min(connections.values()) # excluded atoms connections 23.370 23.371 23.372 @@ -479,7 +479,7 @@ 23.373 for i in keys1: 23.374 23.375 cost_sum[i] = 0 23.376 - 23.377 + 23.378 if connections[i] == exclude_connect: 23.379 23.380 for j in keys1: 23.381 @@ -495,13 +495,13 @@ 23.382 keys2 = [] 23.383 23.384 for i in keys1: 23.385 - 23.386 + 23.387 if connections[i] == exclude_connect: 23.388 23.389 if cost_sum[i] == exclude_cost: 23.390 23.391 continue 23.392 - 23.393 + 23.394 keys2.append(i) 23.395 23.396 keys1 = clon(keys2) 23.397 @@ -511,11 +511,11 @@ 23.398 keys2 = [] 23.399 23.400 for i in keys1: 23.401 - 23.402 + 23.403 if connections[i] == exclude_connect: 23.404 23.405 continue 23.406 - 23.407 + 23.408 keys2.append(i) 23.409 23.410 keys1 = clon(keys2) 23.411 @@ -546,22 +546,22 @@ 23.412 23.413 23.414 break 23.415 - 23.416 - 23.417 + 23.418 + 23.419 else: 23.420 # no new atoms 23.421 break 23.422 - 23.423 - 23.424 + 23.425 + 23.426 23.427 # keys1 is klika 23.428 23.429 self.kliki.append(keys1[:]) 23.430 23.431 - 23.432 23.433 - 23.434 - 23.435 - 23.436 23.437 - 23.438 + 23.439 + 23.440 + 23.441 + 23.442 +
24.1 --- a/blocks3d/clon.py Mon Jan 24 21:40:10 2011 +0300 24.2 +++ b/blocks3d/clon.py Tue Jan 25 16:03:00 2011 +0300 24.3 @@ -10,9 +10,9 @@ 24.4 24.5 24.6 if t == list or t == tuple: 24.7 - 24.8 + 24.9 r = [] 24.10 - 24.11 + 24.12 for i in obj: 24.13 r.append(clon(i)) 24.14 24.15 @@ -23,9 +23,9 @@ 24.16 24.17 24.18 if t == dict: 24.19 - 24.20 + 24.21 r = {} 24.22 - 24.23 + 24.24 for key, value in obj.items(): 24.25 r[key] = clon(value) 24.26 24.27 @@ -34,5 +34,5 @@ 24.28 24.29 return obj 24.30 24.31 - 24.32 - 24.33 + 24.34 +
25.1 --- a/blocks3d/wt/blocks3d-wt.pro Mon Jan 24 21:40:10 2011 +0300 25.2 +++ b/blocks3d/wt/blocks3d-wt.pro Tue Jan 25 16:03:00 2011 +0300 25.3 @@ -1,6 +1,5 @@ 25.4 TARGET = blocks3d-wt.exe 25.5 25.6 -SOURCES += config.C 25.7 SOURCES += mktemp_string.C 25.8 SOURCES += blocks3d-wt.C 25.9 SOURCES += blocks3d-wt-widget.C 25.10 @@ -21,4 +20,4 @@ 25.11 } 25.12 25.13 INCLUDEPATH = /usr/local/include/Wt/ 25.14 -LIBS += -lwthttp -lwt 25.15 +LIBS += -lwthttp -lwt -lboost_signals
26.1 --- a/blocks3d/wt/compile.sh Mon Jan 24 21:40:10 2011 +0300 26.2 +++ b/blocks3d/wt/compile.sh Tue Jan 25 16:03:00 2011 +0300 26.3 @@ -1,4 +1,5 @@ 26.4 -cd build; 26.5 +mkdir -p build 26.6 +cd build 26.7 qmake ../blocks3d-wt.pro && make VERBOSE=0 26.8 #~ cmake .. && make VERBOSE=1 26.9
27.1 --- a/blocks3d/wt/config.C Mon Jan 24 21:40:10 2011 +0300 27.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 27.3 @@ -1,36 +0,0 @@ 27.4 - 27.5 -#include "config.h" 27.6 -#include <boost/format.hpp> 27.7 -#include <string> 27.8 - 27.9 - 27.10 -namespace Malakite 27.11 -{ 27.12 - 27.13 -const char* python_path = "python"; 27.14 -const char* blocks3d_path = "/home/boris/my-code/Blocks3D/Blocks3D_v5/Blocks3D.py"; 27.15 -const char* temp_path = "/tmp"; 27.16 -const char* temp_mkstemp_path = "/tmp/XXXXXX"; 27.17 - 27.18 -const char* main_url = "/"; 27.19 - 27.20 -//~ const char* tempfile_alignment = "in_aln.fasta"; 27.21 -//~ const char* tempfile_conformity = "in_conf.fasta"; 27.22 -//~ const char* tempfile_html = "out.html"; 27.23 -//~ const char* tempfile_check = "check.lock"; 27.24 -const char* tempfile_sh = "run.sh"; 27.25 - 27.26 -namespace defaults 27.27 -{ 27.28 - 27.29 -const char* delta = "2.0"; 27.30 -const char* min_block_width = "3"; 27.31 -const char* timeout = "10"; 27.32 -const char* timeout2 = "10"; 27.33 - 27.34 -} 27.35 - 27.36 - 27.37 - 27.38 -} 27.39 -
28.1 --- a/blocks3d/wt/config.h Mon Jan 24 21:40:10 2011 +0300 28.2 +++ b/blocks3d/wt/config.h Tue Jan 25 16:03:00 2011 +0300 28.3 @@ -3,34 +3,34 @@ 28.4 28.5 namespace Malakite 28.6 { 28.7 - 28.8 + 28.9 const int refresh_period = 5*1000; 28.10 - 28.11 const unsigned int max_simultaneous_tasks = 100; 28.12 28.13 -extern const char* python_path; 28.14 -extern const char* blocks3d_path; 28.15 -extern const char* temp_path; 28.16 -extern const char* temp_mkstemp_path; 28.17 +const char* const python_path = "python"; 28.18 +const char* const blocks3d_path = "/home/boris/my-code/Blocks3D/Blocks3D_v5/Blocks3D.py"; 28.19 +const char* const temp_path = "/tmp"; 28.20 +const char* const temp_mkstemp_path = "/tmp/XXXXXX"; 28.21 28.22 -extern const char* main_url; 28.23 +const char* const main_url = "/"; 28.24 28.25 -//~ extern const char* tempfile_alignment; 28.26 -//~ extern const char* tempfile_conformity; 28.27 -//~ extern const char* tempfile_html; 28.28 -//~ extern const char* tempfile_check; 28.29 -extern const char* tempfile_sh; 28.30 +const char* const tempfile_sh = "run.sh"; 28.31 28.32 namespace defaults 28.33 { 28.34 28.35 -extern const char* delta; 28.36 -extern const char* min_block_width; 28.37 -extern const char* timeout; 28.38 -extern const char* timeout2; 28.39 +const char* const delta = "2.0"; 28.40 +const char* const min_block_width = "3"; 28.41 +const char* const timeout = "10"; 28.42 +const char* const timeout2 = "10"; 28.43 28.44 } 28.45 28.46 + 28.47 + 28.48 } 28.49 28.50 + 28.51 + 28.52 + 28.53 #endif // MALAKITE_BLOCKS3D_CONFIG_H_s
29.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 29.2 +++ b/debian/changelog Tue Jan 25 16:03:00 2011 +0300 29.3 @@ -0,0 +1,15 @@ 29.4 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru> Mon, 22 Nov 2010 14:03:22 +0300 29.5 + 29.6 +allpy (0.0-3) unstable; urgency=medium 29.7 + 29.8 + * Fixed syntax errors in source to soothe postinst scripts. 29.9 + 29.10 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru> Mon, 22 Nov 2010 13:43:59 +0300 29.11 + 29.12 +allpy (0.0-2) unstable; urgency=medium 29.13 + 29.14 + * Initial debianization. 29.15 + 29.16 + -- Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru> Mon, 15 Nov 2010 17:40:37 +0300 29.17 + 29.18 +vim: set ft=debchangelog et ai:
30.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 30.2 +++ b/debian/compat Tue Jan 25 16:03:00 2011 +0300 30.3 @@ -0,0 +1,1 @@ 30.4 +7
31.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 31.2 +++ b/debian/control.in Tue Jan 25 16:03:00 2011 +0300 31.3 @@ -0,0 +1,16 @@ 31.4 +Source: allpy 31.5 +Section: science 31.6 +Priority: optional 31.7 +Maintainer: Danya Alexeyevsky <dendik@kodomo.fbb.msu.ru> 31.8 +Standards-Version: 3.6.1 31.9 +Build-Depends: @cdbs@ 31.10 + 31.11 +Package: python-allpy 31.12 +Architecture: all 31.13 +Depends: python (>> 2.5) 31.14 +Description: Python library for working with alignments 31.15 + 31.16 +Package: geometrical-core 31.17 +Architecture: all 31.18 +Depends: python-allpy 31.19 +Description: Find geometrical core in a multiple alignment
32.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 32.2 +++ b/debian/geometrical-core.install Tue Jan 25 16:03:00 2011 +0300 32.3 @@ -0,0 +1,1 @@ 32.4 +geometrical_core/geometrical-core usr/bin
33.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 33.2 +++ b/debian/rules Tue Jan 25 16:03:00 2011 +0300 33.3 @@ -0,0 +1,8 @@ 33.4 +#!/usr/bin/make -f 33.5 +DEB_AUTO_UPDATE_DEBIAN_CONTROL = yes 33.6 +DEB_PYTHON_SYSTEM = pysupport 33.7 +include /usr/share/cdbs/1/rules/debhelper.mk 33.8 +include /usr/share/cdbs/1/class/python-distutils.mk 33.9 + 33.10 +DEB_COMPRESS_EXCLUDE := .py 33.11 +DEB_DESTDIR = debian/python-allpy
34.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 34.2 +++ b/docs/Makefile Tue Jan 25 16:03:00 2011 +0300 34.3 @@ -0,0 +1,89 @@ 34.4 +# Makefile for Sphinx documentation 34.5 +# 34.6 + 34.7 +# You can set these variables from the command line. 34.8 +SPHINXOPTS = 34.9 +SPHINXBUILD = sphinx-build 34.10 +PAPER = 34.11 +BUILDDIR = build 34.12 + 34.13 +# Internal variables. 34.14 +PAPEROPT_a4 = -D latex_paper_size=a4 34.15 +PAPEROPT_letter = -D latex_paper_size=letter 34.16 +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 34.17 + 34.18 +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest 34.19 + 34.20 +help: 34.21 + @echo "Please use \`make <target>' where <target> is one of" 34.22 + @echo " html to make standalone HTML files" 34.23 + @echo " dirhtml to make HTML files named index.html in directories" 34.24 + @echo " pickle to make pickle files" 34.25 + @echo " json to make JSON files" 34.26 + @echo " htmlhelp to make HTML files and a HTML help project" 34.27 + @echo " qthelp to make HTML files and a qthelp project" 34.28 + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 34.29 + @echo " changes to make an overview of all changed/added/deprecated items" 34.30 + @echo " linkcheck to check all external links for integrity" 34.31 + @echo " doctest to run all doctests embedded in the documentation (if enabled)" 34.32 + 34.33 +clean: 34.34 + -rm -rf $(BUILDDIR)/* 34.35 + 34.36 +html: 34.37 + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 34.38 + @echo 34.39 + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 34.40 + 34.41 +dirhtml: 34.42 + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 34.43 + @echo 34.44 + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 34.45 + 34.46 +pickle: 34.47 + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 34.48 + @echo 34.49 + @echo "Build finished; now you can process the pickle files." 34.50 + 34.51 +json: 34.52 + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 34.53 + @echo 34.54 + @echo "Build finished; now you can process the JSON files." 34.55 + 34.56 +htmlhelp: 34.57 + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 34.58 + @echo 34.59 + @echo "Build finished; now you can run HTML Help Workshop with the" \ 34.60 + ".hhp project file in $(BUILDDIR)/htmlhelp." 34.61 + 34.62 +qthelp: 34.63 + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 34.64 + @echo 34.65 + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 34.66 + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 34.67 + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/allpy.qhcp" 34.68 + @echo "To view the help file:" 34.69 + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/allpy.qhc" 34.70 + 34.71 +latex: 34.72 + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 34.73 + @echo 34.74 + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 34.75 + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ 34.76 + "run these through (pdf)latex." 34.77 + 34.78 +changes: 34.79 + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 34.80 + @echo 34.81 + @echo "The overview file is in $(BUILDDIR)/changes." 34.82 + 34.83 +linkcheck: 34.84 + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 34.85 + @echo 34.86 + @echo "Link check complete; look for any errors in the above output " \ 34.87 + "or in $(BUILDDIR)/linkcheck/output.txt." 34.88 + 34.89 +doctest: 34.90 + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 34.91 + @echo "Testing of doctests in the sources finished, look at the " \ 34.92 + "results in $(BUILDDIR)/doctest/output.txt."
35.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 35.2 +++ b/docs/source/allpy/base.rst Tue Jan 25 16:03:00 2011 +0300 35.3 @@ -0,0 +1,12 @@ 35.4 +Base Documentation 35.5 +================== 35.6 + 35.7 +This page contains the Base Module documentation. 35.8 + 35.9 +The :mod:`base` Module 35.10 +---------------------- 35.11 + 35.12 +.. automodule:: allpy.base 35.13 + :members: 35.14 + :undoc-members: 35.15 + :show-inheritance:
36.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 36.2 +++ b/docs/source/allpy/data.rst Tue Jan 25 16:03:00 2011 +0300 36.3 @@ -0,0 +1,5 @@ 36.4 +Data Documentation 36.5 +================== 36.6 + 36.7 +This page contains the Data Package documentation. 36.8 +
37.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 37.2 +++ b/docs/source/allpy/dna.rst Tue Jan 25 16:03:00 2011 +0300 37.3 @@ -0,0 +1,12 @@ 37.4 +Dna Documentation 37.5 +================= 37.6 + 37.7 +This page contains the Dna Module documentation. 37.8 + 37.9 +The :mod:`dna` Module 37.10 +--------------------- 37.11 + 37.12 +.. automodule:: allpy.dna 37.13 + :members: 37.14 + :undoc-members: 37.15 + :show-inheritance:
38.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 38.2 +++ b/docs/source/allpy/fasta.rst Tue Jan 25 16:03:00 2011 +0300 38.3 @@ -0,0 +1,12 @@ 38.4 +Fasta Documentation 38.5 +=================== 38.6 + 38.7 +This page contains the Fasta Module documentation. 38.8 + 38.9 +The :mod:`fasta` Module 38.10 +----------------------- 38.11 + 38.12 +.. automodule:: allpy.fasta 38.13 + :members: 38.14 + :undoc-members: 38.15 + :show-inheritance:
39.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 39.2 +++ b/docs/source/allpy/graph.rst Tue Jan 25 16:03:00 2011 +0300 39.3 @@ -0,0 +1,12 @@ 39.4 +Graph Documentation 39.5 +=================== 39.6 + 39.7 +This page contains the Graph Module documentation. 39.8 + 39.9 +The :mod:`graph` Module 39.10 +----------------------- 39.11 + 39.12 +.. automodule:: allpy.graph 39.13 + :members: 39.14 + :undoc-members: 39.15 + :show-inheritance:
40.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 40.2 +++ b/docs/source/allpy/modules.rst Tue Jan 25 16:03:00 2011 +0300 40.3 @@ -0,0 +1,19 @@ 40.4 +Project Documentation 40.5 +===================== 40.6 + 40.7 +This page contains the Project Modules documentation. 40.8 + 40.9 +Modules: 40.10 +-------- 40.11 + 40.12 +.. toctree:: 40.13 + :maxdepth: 4 40.14 + 40.15 + base 40.16 + data 40.17 + dna 40.18 + fasta 40.19 + graph 40.20 + pdb 40.21 + protein 40.22 + rna
41.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 41.2 +++ b/docs/source/allpy/pdb.rst Tue Jan 25 16:03:00 2011 +0300 41.3 @@ -0,0 +1,12 @@ 41.4 +Pdb Documentation 41.5 +================= 41.6 + 41.7 +This page contains the Pdb Module documentation. 41.8 + 41.9 +The :mod:`pdb` Module 41.10 +--------------------- 41.11 + 41.12 +.. automodule:: allpy.pdb 41.13 + :members: 41.14 + :undoc-members: 41.15 + :show-inheritance:
42.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 42.2 +++ b/docs/source/allpy/protein.rst Tue Jan 25 16:03:00 2011 +0300 42.3 @@ -0,0 +1,12 @@ 42.4 +Protein Documentation 42.5 +===================== 42.6 + 42.7 +This page contains the Protein Module documentation. 42.8 + 42.9 +The :mod:`protein` Module 42.10 +------------------------- 42.11 + 42.12 +.. automodule:: allpy.protein 42.13 + :members: 42.14 + :undoc-members: 42.15 + :show-inheritance:
43.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 43.2 +++ b/docs/source/allpy/rna.rst Tue Jan 25 16:03:00 2011 +0300 43.3 @@ -0,0 +1,12 @@ 43.4 +Rna Documentation 43.5 +================= 43.6 + 43.7 +This page contains the Rna Module documentation. 43.8 + 43.9 +The :mod:`rna` Module 43.10 +--------------------- 43.11 + 43.12 +.. automodule:: allpy.rna 43.13 + :members: 43.14 + :undoc-members: 43.15 + :show-inheritance:
44.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 44.2 +++ b/docs/source/conf.py Tue Jan 25 16:03:00 2011 +0300 44.3 @@ -0,0 +1,194 @@ 44.4 +# -*- coding: utf-8 -*- 44.5 +# 44.6 +# allpy documentation build configuration file, created by 44.7 +# sphinx-quickstart on Mon Dec 13 22:14:23 2010. 44.8 +# 44.9 +# This file is execfile()d with the current directory set to its containing dir. 44.10 +# 44.11 +# Note that not all possible configuration values are present in this 44.12 +# autogenerated file. 44.13 +# 44.14 +# All configuration values have a default; values that are commented out 44.15 +# serve to show the default. 44.16 + 44.17 +import sys, os 44.18 + 44.19 +# If extensions (or modules to document with autodoc) are in another directory, 44.20 +# add these directories to sys.path here. If the directory is relative to the 44.21 +# documentation root, use os.path.abspath to make it absolute, like shown here. 44.22 +#sys.path.append(os.path.abspath('.')) 44.23 + 44.24 +# -- General configuration ----------------------------------------------------- 44.25 + 44.26 +# Add any Sphinx extension module names here, as strings. They can be extensions 44.27 +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 44.28 +extensions = ['sphinx.ext.autodoc'] 44.29 + 44.30 +# Add any paths that contain templates here, relative to this directory. 44.31 +templates_path = ['_templates'] 44.32 + 44.33 +# The suffix of source filenames. 44.34 +source_suffix = '.rst' 44.35 + 44.36 +# The encoding of source files. 44.37 +#source_encoding = 'utf-8' 44.38 + 44.39 +# The master toctree document. 44.40 +master_doc = 'index' 44.41 + 44.42 +# General information about the project. 44.43 +project = u'allpy' 44.44 +copyright = u'2010, many people' 44.45 + 44.46 +# The version info for the project you're documenting, acts as replacement for 44.47 +# |version| and |release|, also used in various other places throughout the 44.48 +# built documents. 44.49 +# 44.50 +# The short X.Y version. 44.51 +version = '0.0' 44.52 +# The full version, including alpha/beta/rc tags. 44.53 +release = '0.0' 44.54 + 44.55 +# The language for content autogenerated by Sphinx. Refer to documentation 44.56 +# for a list of supported languages. 44.57 +#language = None 44.58 + 44.59 +# There are two options for replacing |today|: either, you set today to some 44.60 +# non-false value, then it is used: 44.61 +#today = '' 44.62 +# Else, today_fmt is used as the format for a strftime call. 44.63 +#today_fmt = '%B %d, %Y' 44.64 + 44.65 +# List of documents that shouldn't be included in the build. 44.66 +#unused_docs = [] 44.67 + 44.68 +# List of directories, relative to source directory, that shouldn't be searched 44.69 +# for source files. 44.70 +exclude_trees = [] 44.71 + 44.72 +# The reST default role (used for this markup: `text`) to use for all documents. 44.73 +#default_role = None 44.74 + 44.75 +# If true, '()' will be appended to :func: etc. cross-reference text. 44.76 +#add_function_parentheses = True 44.77 + 44.78 +# If true, the current module name will be prepended to all description 44.79 +# unit titles (such as .. function::). 44.80 +#add_module_names = True 44.81 + 44.82 +# If true, sectionauthor and moduleauthor directives will be shown in the 44.83 +# output. They are ignored by default. 44.84 +#show_authors = False 44.85 + 44.86 +# The name of the Pygments (syntax highlighting) style to use. 44.87 +pygments_style = 'sphinx' 44.88 + 44.89 +# A list of ignored prefixes for module index sorting. 44.90 +#modindex_common_prefix = [] 44.91 + 44.92 + 44.93 +# -- Options for HTML output --------------------------------------------------- 44.94 + 44.95 +# The theme to use for HTML and HTML Help pages. Major themes that come with 44.96 +# Sphinx are currently 'default' and 'sphinxdoc'. 44.97 +html_theme = 'default' 44.98 + 44.99 +# Theme options are theme-specific and customize the look and feel of a theme 44.100 +# further. For a list of options available for each theme, see the 44.101 +# documentation. 44.102 +#html_theme_options = {} 44.103 + 44.104 +# Add any paths that contain custom themes here, relative to this directory. 44.105 +#html_theme_path = [] 44.106 + 44.107 +# The name for this set of Sphinx documents. If None, it defaults to 44.108 +# "<project> v<release> documentation". 44.109 +#html_title = None 44.110 + 44.111 +# A shorter title for the navigation bar. Default is the same as html_title. 44.112 +#html_short_title = None 44.113 + 44.114 +# The name of an image file (relative to this directory) to place at the top 44.115 +# of the sidebar. 44.116 +#html_logo = None 44.117 + 44.118 +# The name of an image file (within the static path) to use as favicon of the 44.119 +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 44.120 +# pixels large. 44.121 +#html_favicon = None 44.122 + 44.123 +# Add any paths that contain custom static files (such as style sheets) here, 44.124 +# relative to this directory. They are copied after the builtin static files, 44.125 +# so a file named "default.css" will overwrite the builtin "default.css". 44.126 +html_static_path = ['_static'] 44.127 + 44.128 +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 44.129 +# using the given strftime format. 44.130 +#html_last_updated_fmt = '%b %d, %Y' 44.131 + 44.132 +# If true, SmartyPants will be used to convert quotes and dashes to 44.133 +# typographically correct entities. 44.134 +#html_use_smartypants = True 44.135 + 44.136 +# Custom sidebar templates, maps document names to template names. 44.137 +#html_sidebars = {} 44.138 + 44.139 +# Additional templates that should be rendered to pages, maps page names to 44.140 +# template names. 44.141 +#html_additional_pages = {} 44.142 + 44.143 +# If false, no module index is generated. 44.144 +#html_use_modindex = True 44.145 + 44.146 +# If false, no index is generated. 44.147 +#html_use_index = True 44.148 + 44.149 +# If true, the index is split into individual pages for each letter. 44.150 +#html_split_index = False 44.151 + 44.152 +# If true, links to the reST sources are added to the pages. 44.153 +#html_show_sourcelink = True 44.154 + 44.155 +# If true, an OpenSearch description file will be output, and all pages will 44.156 +# contain a <link> tag referring to it. The value of this option must be the 44.157 +# base URL from which the finished HTML is served. 44.158 +#html_use_opensearch = '' 44.159 + 44.160 +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 44.161 +#html_file_suffix = '' 44.162 + 44.163 +# Output file base name for HTML help builder. 44.164 +htmlhelp_basename = 'allpydoc' 44.165 + 44.166 + 44.167 +# -- Options for LaTeX output -------------------------------------------------- 44.168 + 44.169 +# The paper size ('letter' or 'a4'). 44.170 +#latex_paper_size = 'letter' 44.171 + 44.172 +# The font size ('10pt', '11pt' or '12pt'). 44.173 +#latex_font_size = '10pt' 44.174 + 44.175 +# Grouping the document tree into LaTeX files. List of tuples 44.176 +# (source start file, target name, title, author, documentclass [howto/manual]). 44.177 +latex_documents = [ 44.178 + ('index', 'allpy.tex', u'allpy Documentation', 44.179 + u'many people', 'manual'), 44.180 +] 44.181 + 44.182 +# The name of an image file (relative to this directory) to place at the top of 44.183 +# the title page. 44.184 +#latex_logo = None 44.185 + 44.186 +# For "manual" documents, if this is true, then toplevel headings are parts, 44.187 +# not chapters. 44.188 +#latex_use_parts = False 44.189 + 44.190 +# Additional stuff for the LaTeX preamble. 44.191 +#latex_preamble = '' 44.192 + 44.193 +# Documents to append as an appendix to all manuals. 44.194 +#latex_appendices = [] 44.195 + 44.196 +# If false, no module index is generated. 44.197 +#latex_use_modindex = True
45.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 45.2 +++ b/docs/source/index.rst Tue Jan 25 16:03:00 2011 +0300 45.3 @@ -0,0 +1,22 @@ 45.4 +.. allpy documentation master file, created by 45.5 + sphinx-quickstart on Mon Dec 13 22:14:23 2010. 45.6 + You can adapt this file completely to your liking, but it should at least 45.7 + contain the root `toctree` directive. 45.8 + 45.9 +Welcome to allpy's documentation! 45.10 +================================= 45.11 + 45.12 +Contents: 45.13 + 45.14 +.. toctree:: 45.15 + :maxdepth: 2 45.16 + 45.17 + allpy/modules 45.18 + 45.19 +Indices and tables 45.20 +================== 45.21 + 45.22 +* :ref:`genindex` 45.23 +* :ref:`modindex` 45.24 +* :ref:`search` 45.25 +
46.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 46.2 +++ b/geometrical_core/geometrical-core Tue Jan 25 16:03:00 2011 +0300 46.3 @@ -0,0 +1,181 @@ 46.4 +#!/usr/bin/python 46.5 +""" 46.6 +Geometrical Core building tool 46.7 +version 2.0 46.8 +""" 46.9 + 46.10 +from allpy import config, alignment, block 46.11 +Block = block.Block 46.12 +Alignment = alignment.Alignment 46.13 +import argparse 46.14 +import os 46.15 +from tempfile import NamedTemporaryFile 46.16 + 46.17 +r = argparse.FileType('r') 46.18 +w = argparse.FileType('w') 46.19 +c = config 46.20 + 46.21 +def f_nng(string): 46.22 + """ Validates nonnegative (>=0) float """ 46.23 + try: 46.24 + value = float(string) 46.25 + except: 46.26 + msg = "%r is wrong float" % string 46.27 + raise argparse.ArgumentTypeError(msg) 46.28 + if value < 0: 46.29 + msg = "%r is negative" % string 46.30 + raise argparse.ArgumentTypeError(msg) 46.31 + return value 46.32 + 46.33 +def part(string): 46.34 + """ Validates 0.0 <= float <= 1.0 """ 46.35 + try: 46.36 + value = float(string) 46.37 + except: 46.38 + msg = "%r is wrong float" % string 46.39 + raise argparse.ArgumentTypeError(msg) 46.40 + if not (0.0 <= value <= 1.0): 46.41 + msg = "%r is not float, representing part, ie in [0, 1]" % string 46.42 + raise argparse.ArgumentTypeError(msg) 46.43 + return value 46.44 + 46.45 +def timeout(string): 46.46 + """ Validates int >= -1 """ 46.47 + try: 46.48 + value = int(string) 46.49 + except: 46.50 + msg = "%r is wrong integer" % string 46.51 + raise argparse.ArgumentTypeError(msg) 46.52 + if value < -1: 46.53 + msg = "integer %r is less than -1" % string 46.54 + raise argparse.ArgumentTypeError(msg) 46.55 + return value 46.56 + 46.57 +def pos(string): 46.58 + """ Validates positive integer """ 46.59 + try: 46.60 + value = int(string) 46.61 + except: 46.62 + msg = "%r is wrong integer" % string 46.63 + raise argparse.ArgumentTypeError(msg) 46.64 + if value < 1: 46.65 + msg = "%r is not positive integer" % string 46.66 + raise argparse.ArgumentTypeError(msg) 46.67 + return value 46.68 + 46.69 +def i_nng(string): 46.70 + """ Validates int >= 0 """ 46.71 + try: 46.72 + value = int(string) 46.73 + except: 46.74 + msg = "%r is wrong integer" % string 46.75 + raise argparse.ArgumentTypeError(msg) 46.76 + if value < 0: 46.77 + msg = "integer %r is less than 0" % string 46.78 + raise argparse.ArgumentTypeError(msg) 46.79 + return value 46.80 + 46.81 +p = argparse.ArgumentParser( 46.82 +description='Geometrical Core building tool.', 46.83 +epilog='''1) Distance spreading [angstrom] 46.84 +2) -1 timeout means running Bron-Kerbosh algorithm without timeout 46.85 +3) Alternative core new aa part: read documentation for more information 46.86 +4) Superposition core identifier: main core is 0, first alternative is 1 etc. ''', 46.87 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 46.88 +#~ argument_default=argparse.SUPPRESS, 46.89 +) 46.90 + 46.91 +p.add_argument('-v','--version',action='version',version='%(prog)s 2.0') 46.92 +p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True) 46.93 +p.add_argument('-c',help='PDB names conformity file',metavar='FILE',type=r) 46.94 +p.add_argument('-l',help='Output list file',metavar='FILE',type=w) 46.95 +p.add_argument('-f',help='Output fasta file',metavar='FILE',type=w) 46.96 +p.add_argument('-g',help='Output msf file',metavar='FILE',type=w) 46.97 +p.add_argument('-p',help='Output pdb file',metavar='FILE',type=w) 46.98 +p.add_argument('-s',help='Output spt file',metavar='FILE',type=w) 46.99 +p.add_argument('-d',help='Distance spreading',metavar='DELTA',type=f_nng,default=c.delta) 46.100 +p.add_argument('-m',help='Min core size',metavar='MIN_SIZE',type=pos,default=c.minsize) 46.101 +p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',type=timeout,default=c.timeout) 46.102 +p.add_argument('-n',help='Alternative core new aa part',type=part,default=c.ac_new_atoms) 46.103 +p.add_argument('-a',help='Cores count',metavar='NEW_ATOMS',type=pos,default=c.ac_count) 46.104 +p.add_argument('-x',help='Superposition core identifier',type=i_nng,default=0) 46.105 + 46.106 +tmp_file = None 46.107 + 46.108 +try: 46.109 + args = p.parse_args() 46.110 + 46.111 + if not args.l and not args.f and not args.g and not args.p and not args.s: 46.112 + print 'Error: no output file provided' 46.113 + exit() 46.114 + if not (args.p and args.s) and not (not args.p and not args.s): 46.115 + print 'Error: provide both pdb and spt file or none of them' 46.116 + exit() 46.117 + 46.118 + try: 46.119 + alignment = Alignment(args.i) 46.120 + except: 46.121 + args.i.close() 46.122 + tmp_file = NamedTemporaryFile(delete=False) 46.123 + tmp_file.close() 46.124 + os.system('seqret %(msf)s %(fasta)s' % \ 46.125 + {'msf': args.i.name, 'fasta': tmp_file.name}) 46.126 + args.i = open(tmp_file.name) 46.127 + alignment = Alignment(args.i) 46.128 + 46.129 + block = Block(alignment) 46.130 + GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 46.131 + minsize=args.t, ac_new_atoms=args.n, ac_count=args.a) 46.132 + 46.133 + if not GCs: 46.134 + print 'No cores! Try to change parameters' 46.135 + exit() 46.136 + 46.137 + if args.l: 46.138 + l = args.l 46.139 + 46.140 + l.write('Geometrical core positions for alignment %s' % args.i.name) 46.141 + l.write('\n\n') 46.142 + l.write('First alignment position is 0') 46.143 + 46.144 + for i, GC in enumerate(GCs): 46.145 + l.write('\n\n') 46.146 + if i == 0: 46.147 + l.write('Geometrical core:') 46.148 + else: 46.149 + l.write('Alternative geometrical core %i:' % i) 46.150 + l.write('\n') 46.151 + l.write(', '.join(str(n) for n in GC.positions)) 46.152 + l.close() 46.153 + 46.154 + if args.g and not args.f: 46.155 + args.f = args.g 46.156 + 46.157 + if args.f: 46.158 + args.i.seek(0) 46.159 + f = args.f 46.160 + f.write(args.i.read()) # write sequences 46.161 + 46.162 + # write GCs 46.163 + for i, GC in enumerate(GCs): 46.164 + f.write('\n\n') 46.165 + if i == 0: 46.166 + GC.save_xstring(f, 'GC', 'Main geometrical core') 46.167 + else: 46.168 + GC.save_xstring(f, 'AGC_%i' % i, 'Alternative geometrical core %i' % i) 46.169 + f.close() 46.170 + 46.171 + 46.172 + if args.g: 46.173 + args.g.close() 46.174 + os.system('seqret %(fasta)s msf::%(msf)s' % \ 46.175 + {'fasta': args.f.name, 'msf': args.g.name}) 46.176 + 46.177 + 46.178 +except Exception, t: 46.179 + print t 46.180 + exit() 46.181 + 46.182 +if tmp_file: 46.183 + os.unlink(tmp_file) 46.184 +
47.1 --- a/geometrical_core/geometrical_core.py Mon Jan 24 21:40:10 2011 +0300 47.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 47.3 @@ -1,181 +0,0 @@ 47.4 -""" 47.5 -Geometrical Core building tool 47.6 -version 2.0 47.7 -""" 47.8 - 47.9 -from allpy.lib import config, project, block 47.10 -Block = block.Block 47.11 -Project = project.Project 47.12 -import argparse 47.13 -import os 47.14 -from tempfile import NamedTemporaryFile 47.15 - 47.16 -r = argparse.FileType('r') 47.17 -w = argparse.FileType('w') 47.18 -c = config 47.19 - 47.20 -def f_nng(string): 47.21 - """ Validates nonnegative (>=0) float """ 47.22 - try: 47.23 - value = float(string) 47.24 - except: 47.25 - msg = "%r is wrong float" % string 47.26 - raise argparse.ArgumentTypeError(msg) 47.27 - if value < 0: 47.28 - msg = "%r is negative" % string 47.29 - raise argparse.ArgumentTypeError(msg) 47.30 - return value 47.31 - 47.32 -def part(string): 47.33 - """ Validates 0.0 <= float <= 1.0 """ 47.34 - try: 47.35 - value = float(string) 47.36 - except: 47.37 - msg = "%r is wrong float" % string 47.38 - raise argparse.ArgumentTypeError(msg) 47.39 - if not (0.0 <= value <= 1.0): 47.40 - msg = "%r is not float, representing part, ie in [0, 1]" % string 47.41 - raise argparse.ArgumentTypeError(msg) 47.42 - return value 47.43 - 47.44 -def timeout(string): 47.45 - """ Validates int >= -1 """ 47.46 - try: 47.47 - value = int(string) 47.48 - except: 47.49 - msg = "%r is wrong integer" % string 47.50 - raise argparse.ArgumentTypeError(msg) 47.51 - if value < -1: 47.52 - msg = "integer %r is less than -1" % string 47.53 - raise argparse.ArgumentTypeError(msg) 47.54 - return value 47.55 - 47.56 -def pos(string): 47.57 - """ Validates positive integer """ 47.58 - try: 47.59 - value = int(string) 47.60 - except: 47.61 - msg = "%r is wrong integer" % string 47.62 - raise argparse.ArgumentTypeError(msg) 47.63 - if value < 1: 47.64 - msg = "%r is not positive integer" % string 47.65 - raise argparse.ArgumentTypeError(msg) 47.66 - return value 47.67 - 47.68 -def i_nng(string): 47.69 - """ Validates int >= 0 """ 47.70 - try: 47.71 - value = int(string) 47.72 - except: 47.73 - msg = "%r is wrong integer" % string 47.74 - raise argparse.ArgumentTypeError(msg) 47.75 - if value < 0: 47.76 - msg = "integer %r is less than 0" % string 47.77 - raise argparse.ArgumentTypeError(msg) 47.78 - return value 47.79 - 47.80 -p = argparse.ArgumentParser( 47.81 -description='Geometrical Core building tool.', 47.82 -epilog='''1) Distance spreading [angstrom] 47.83 -2) -1 timeout means running Bron-Kerbosh algorithm without timeout 47.84 -3) Alternative core new aa part: read documentation for more information 47.85 -4) Superposition core identifier: main core is 0, first alternative is 1 etc. ''', 47.86 -formatter_class=argparse.ArgumentDefaultsHelpFormatter, 47.87 -#~ argument_default=argparse.SUPPRESS, 47.88 -) 47.89 - 47.90 -p.add_argument('-v','--version',action='version',version='%(prog)s 2.0') 47.91 -p.add_argument('-i',help='Input alignment file',metavar='FILE',type=r,required=True) 47.92 -p.add_argument('-c',help='PDB names conformity file',metavar='FILE',type=r) 47.93 -p.add_argument('-l',help='Output list file',metavar='FILE',type=w) 47.94 -p.add_argument('-f',help='Output fasta file',metavar='FILE',type=w) 47.95 -p.add_argument('-g',help='Output msf file',metavar='FILE',type=w) 47.96 -p.add_argument('-p',help='Output pdb file',metavar='FILE',type=w) 47.97 -p.add_argument('-s',help='Output spt file',metavar='FILE',type=w) 47.98 -p.add_argument('-d',help='Distance spreading',metavar='DELTA',type=f_nng,default=c.delta) 47.99 -p.add_argument('-m',help='Min core size',metavar='MIN_SIZE',type=pos,default=c.minsize) 47.100 -p.add_argument('-t',help='Bron-Kerbosh algorithm timeout',type=timeout,default=c.timeout) 47.101 -p.add_argument('-n',help='Alternative core new aa part',type=part,default=c.ac_new_atoms) 47.102 -p.add_argument('-a',help='Cores count',metavar='NEW_ATOMS',type=pos,default=c.ac_count) 47.103 -p.add_argument('-x',help='Superposition core identifier',type=i_nng,default=0) 47.104 - 47.105 -tmp_file = None 47.106 - 47.107 -try: 47.108 - args = p.parse_args() 47.109 - 47.110 - if not args.l and not args.f and not args.g and not args.p and not args.s: 47.111 - print 'Error: no output file provided' 47.112 - exit() 47.113 - if not (args.p and args.s) and not (not args.p and not args.s): 47.114 - print 'Error: provide both pdb and spt file or none of them' 47.115 - exit() 47.116 - 47.117 - try: 47.118 - project = Project(args.i) 47.119 - except: 47.120 - args.i.close() 47.121 - tmp_file = NamedTemporaryFile(delete=False) 47.122 - tmp_file.close() 47.123 - os.system('seqret %(msf)s %(fasta)s' % \ 47.124 - {'msf': args.i.name, 'fasta': tmp_file.name}) 47.125 - args.i = open(tmp_file.name) 47.126 - project = Project(args.i) 47.127 - 47.128 - project.pdb_auto_add(args.c) 47.129 - block = Block(project) 47.130 - GCs = block.geometrical_cores(max_delta=args.d, timeout=args.t, 47.131 - minsize=args.t, ac_new_atoms=args.n, ac_count=args.a) 47.132 - 47.133 - if not GCs: 47.134 - print 'No cores! Try to change parameters' 47.135 - exit() 47.136 - 47.137 - if args.l: 47.138 - l = args.l 47.139 - 47.140 - l.write('Geometrical core positions for alignment %s' % args.i.name) 47.141 - l.write('\n\n') 47.142 - l.write('First alignment position is 0') 47.143 - 47.144 - for i, GC in enumerate(GCs): 47.145 - l.write('\n\n') 47.146 - if i == 0: 47.147 - l.write('Geometrical core:') 47.148 - else: 47.149 - l.write('Alternative geometrical core %i:' % i) 47.150 - l.write('\n') 47.151 - l.write(', '.join(str(n) for n in GC.positions)) 47.152 - l.close() 47.153 - 47.154 - if args.g and not args.f: 47.155 - args.f = args.g 47.156 - 47.157 - if args.f: 47.158 - args.i.seek(0) 47.159 - f = args.f 47.160 - f.write(args.i.read()) # write sequences 47.161 - 47.162 - # write GCs 47.163 - for i, GC in enumerate(GCs): 47.164 - f.write('\n\n') 47.165 - if i == 0: 47.166 - GC.save_xstring(f, 'GC', 'Main geometrical core') 47.167 - else: 47.168 - GC.save_xstring(f, 'AGC_%i' % i, 'Alternative geometrical core %i' % i) 47.169 - f.close() 47.170 - 47.171 - 47.172 - if args.g: 47.173 - args.g.close() 47.174 - os.system('seqret %(fasta)s msf::%(msf)s' % \ 47.175 - {'fasta': args.f.name, 'msf': args.g.name}) 47.176 - 47.177 - 47.178 -except Exception, t: 47.179 - print t 47.180 - exit() 47.181 - 47.182 -if tmp_file: 47.183 - os.unlink(tmp_file) 47.184 -
48.1 --- a/lib/allpy_data/AAdict.py Mon Jan 24 21:40:10 2011 +0300 48.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 48.3 @@ -1,1200 +0,0 @@ 48.4 - 48.5 -""" 48.6 -Dict: 48.7 - key: 3-letter code from PDB 48.8 - value: (1-letter code, type, is_modified, None) 48.9 - type: 48.10 - p - protein 48.11 - d - dna 48.12 - r - rna 48.13 - is_modified: "M" or None 48.14 - None 48.15 - name 48.16 -""" 48.17 - 48.18 -AAdict = { 48.19 -"CPI": ('x', 'p', None, None, '6-CARBOXYPIPERIDINE'), 48.20 -"AGM": ('r', 'p', 'M', None, '5-METHYL-ARGININE'), 48.21 -"G25": ('g', 'r', 'M', None, '"GUANOSINE-5\'-MONOPHOSPHATE"'), 48.22 -"CPN": ('c', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE'), 48.23 -"MA6": ('a', 'r', 'M', None, '"6N-DIMETHYLADENOSINE-5\'-MONOPHOSHATE"'), 48.24 -"NYS": ('c', 'p', 'M', None, 'S-{5-[(1R)-2-AMINO-1-HYDROXYETHYL]-2,3-DIHYDROXYPHENYL}-L-CYSTEINE'), 48.25 -"CPC": ('x', 'p', None, None, '"2-METHYL-1-METHYLAMINO-CYCLOPROPANE'), 48.26 -"8FG": ('g', 'd', 'M', None, '"N-(5\'-PHOSPHO-2\'-DEOXYGUANOSIN-8-YL)-2-ACETYLAMINOFLUORENE"'), 48.27 -"SIC": ('x', 'p', None, None, '"(2R)-2-[(3S)-3-AMINO-2,5-DIOXOPYRROLIDIN-1-YL]-3-SULFANYLPROPANOIC'), 48.28 -"PQ1": ('n', 'r', None, None, '"PHOSPHORIC'), 48.29 -"PAS": ('d', 'p', 'M', None, '"2-AMINO-4-OXO-4-PHOSPHONOOXY-BUTYRIC'), 48.30 -"EOV": ('x', 'p', None, None, '(3S,6S,9S,12R,15S,18S,21S,24S,30S,33S)-30-ethyl-33-[(1R,2R,4E)-1-hydroxy-2-methylhex-4-en-1-yl]-1,4,7,10,12,15,19,25,28-nonamethyl-3,21-bis(1-methylethyl)-6,9,18,24-tetrakis(2-methylpropyl)-1,4,7,10,13,16,19,22,25,28,31-undecaazacyclotritriacontane-2,5,8,11,14,17,20,23,26,29,32-undecone'), 48.31 -"B2I": ('i', 'p', 'M', None, '"ISOLEUCINE'), 48.32 -"NYM": ('n', 'd', None, None, "3'-DEOXY-3'-AMINOTHYMIDINE MONOPHOSPHATE"), 48.33 -"URD": ('u', 'r', 'M', None, '1-((2R,3R,4S,5R)-TETRAHYDRO-3,4-DIHYDROXY-5-(HYDROXYMETHYL)FURAN-2-YL)PYRIDINE-2,4(1H,3H)-DIONE'), 48.34 -"NYC": ('x', 'p', None, None, '"[(4E)-2-[(1R,2S)-1-AMINO-2-HYDROXYPROPYL]-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.35 -"AGT": ('x', 'p', None, None, 'S-{(S)-AMINO[(4-AMINOBUTYL)AMINO]METHYL}-L-CYSTEINE'), 48.36 -"NYG": ('x', 'p', None, None, '"[(4Z)-2-[(1S)-1,3-DIAMINO-3-OXOPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.37 -"CPV": ('x', 'p', None, None, '"5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOPROPYL-HEXANOIC'), 48.38 -"TAV": ('d', 'p', 'M', None, '"N-METHYL-N-{2-[(2-NAPHTHYLSULFONYL)AMINO]-5-[(2-NAPHTHYLSULFONYL)OXY]BENZOYL}-L-ASPARTIC'), 48.39 -"S4G": ('g', 'd', 'M', None, "2'-deoxy-4'-thioguanosine 5'-(dihydrogen phosphate)"), 48.40 -"MA7": ('a', 'd', 'M', None, '"1N-METHYLADENOSINE-5\'-MONOPHOSPHATE"'), 48.41 -"S4C": ('c', 'r', 'M', None, '"4\'-THIO-4\'-DEOXY-CYTOSINE-5\'-MONOPHOSPHATE"'), 48.42 -"UAR": ('u', 'r', 'M', None, "URACIL ARABINOSE-5'-PHOSPHATE"), 48.43 -"SPT": ('t', 'd', 'M', None, "5'-THIO-THYMIDINE PHOSPHONIC ACID"), 48.44 -"S4U": ('u', 'r', 'M', None, '"4-THIOURIDINE-5\'-PHOSPHATE"'), 48.45 -"TAF": ('t', 'd', 'M', None, "2'-DEOXY-2'-FLUORO-ARABINO-FURANOSYL THYMINE-5'-PHOSPHATE"), 48.46 -"5NC": ('c', 'd', 'M', None, '"5-AZA-CYTIDINE-5\'MONOPHOSPHATE"'), 48.47 -"CSF": ('c', 'r', 'M', None, "CYTIDINE-5'-MONOPHOSPHATE-3-FLUORO-N-ACETYL-NEURAMINIC ACID"), 48.48 -"0CS": ('a', 'p', 'M', None, '3-[(S)-HYDROPEROXYSULFINYL]-L-ALANINE'), 48.49 -"B2V": ('v', 'p', 'M', None, '"VALINE'), 48.50 -"TAL": ('n', 'r', None, None, '9-(6-DEOXY-ALPHA-L-TALOFURANOSYL)-6-METHYLPURINE'), 48.51 -"SCY": ('c', 'p', 'M', None, 'S-ACETYL-CYSTEINE'), 48.52 -"KYQ": ('k', 'p', 'M', None, '(E)-N~6~-(1-CARBOXY-2-HYDROXYETHYLIDENE)-L-LYSINE'), 48.53 -"CS8": ('n', 'r', None, None, '"3-THIAOCTANOYL-COENZYME'), 48.54 -"TA4": ('x', 'p', None, None, '"(S)-2-[4-(AMINOMETHYL)-1H-1,2,3-TRIAZOL-1-YL]-4-METHYLPENTANOIC'), 48.55 -"ALN": ('a', 'p', 'M', None, 'NAPHTHALEN-2-YL-3-ALANINE'), 48.56 -"GS": ('g', 'd', 'M', None, '"GUANOSINE-5\'-THIO-MONOPHOSPHATE"'), 48.57 -"2FE": ('n', 'd', None, None, '"2\'-FLUORO-2\'-DEOXY-1,N6-ETHENOADENINE"'), 48.58 -"VAL": ('V', 'p', None, None, 'VALINE'), 48.59 -"7DA": ('a', 'd', 'M', None, '"7-DEAZA-2\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE"'), 48.60 -"XPC": ('x', 'p', None, None, '"(3S,4R)-4-AMINOPYRROLIDINE-3-CARBOXYLIC'), 48.61 -"VAF": ('v', 'p', 'M', None, 'METHYLVALINE'), 48.62 -"VAD": ('v', 'p', 'M', None, 'DEAMINOHYDROXYVALINE'), 48.63 -"BGM": ('g', 'd', 'M', None, '"8-BROMO-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.64 -"P": ('g', 'd', 'M', None, "2'-DEOXY-N1,N2-PROPANO GUANOSINE MONOPHOSPHATE"), 48.65 -"LHU": ('u', 'r', 'M', None, '"L-URIDINE-5\'-MONOPHOSPHATE"'), 48.66 -"T48": ('t', 'd', 'M', None, "6'-ALPHA-HYDROXY CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"), 48.67 -"2GT": ('t', 'd', 'M', None, "2'-O-PROPARGYL THYMIDINE-5'-MONOPHOSPHATE"), 48.68 -"HMF": ('a', 'p', 'M', None, '"2-AMINO-4-PHENYL-BUTYRIC'), 48.69 -"BNN": ('a', 'p', 'M', None, 'ACETYL-P-AMIDINOPHENYLALANINE'), 48.70 -"AHB": ('n', 'p', 'M', None, 'BETA-HYDROXYASPARAGINE'), 48.71 -"NPH": ('c', 'p', 'M', None, 'CYSTEINE-METHYLENE-CARBAMOYL-1,10-PHENANTHROLINE'), 48.72 -"SIB": ('c', 'p', 'M', None, '"(2S)-2-AMINO-4-({[(2S,3S,4R,5R)-3,4-DIHYDROXY-5-(6-OXO-1,6-DIHYDRO-9H-PURIN-9-YL)TETRAHYDROFURAN-2-YL]METHYL}THIO)BUTANOIC'), 48.73 -"SNN": ('n', 'p', 'M', None, 'L-3-AMINOSUCCINIMIDE'), 48.74 -"AHH": ('x', 'p', None, None, '"AMINO-HYDROXYHEPTANOIC'), 48.75 -"JW5": ('n', 'r', None, None, "6-(HYDROXYMETHYL)URIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.76 -"PBB": ('c', 'p', 'M', None, 'S-(4-BROMOBENZYL)CYSTEINE'), 48.77 -"PTR": ('y', 'p', 'M', None, 'O-PHOSPHOTYROSINE'), 48.78 -"MAA": ('a', 'p', 'M', None, 'N-METHYLALANINE'), 48.79 -"AHS": ('x', 'p', None, None, '"(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-ISOBUTYL-CARBAMIC'), 48.80 -"AHP": ('a', 'p', 'M', None, '"2-AMINO-HEPTANOIC'), 48.81 -"MAD": ('a', 'r', 'M', None, '"6-HYDRO-1-METHYLADENOSINE-5\'-MONOPHOSPHATE"'), 48.82 -"AHT": ('x', 'p', None, None, '4-(2,5-DIAMINO-5-HYDROXY-PENTYL)-PHENOL'), 48.83 -"XYG": ('x', 'p', None, None, '"[(4Z)-2-[(1Z)-ETHANIMIDOYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.84 -"MAI": ('r', 'p', 'M', None, 'DEOXO-METHYLARGININE'), 48.85 -"CP1": ('c', 'd', 'M', None, '2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-CYTOSINE'), 48.86 -"G2L": ('g', 'r', 'M', None, '"3\'-O-METHYOXYETHYL-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.87 -"FFD": ('n', 'd', None, None, '(1R)-1,4-ANHYDRO-2-DEOXY-1-(3-FLUOROPHENYL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL'), 48.88 -"CML": ('c', 'p', 'M', None, '"(2S)-2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}BUTANEDIOIC'), 48.89 -"CMH": ('c', 'p', 'M', None, 'S-(METHYLMERCURY)-L-CYSTEINE'), 48.90 -"ZDU": ('n', 'd', None, None, '"5-(3-AMINOPROPYL)-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.91 -"PCS": ('f', 'p', 'M', None, 'PHENYLALANYLMETHYLCHLORIDE'), 48.92 -"CME": ('c', 'p', 'M', None, 'S,S-(2-HYDROXYETHYL)THIOCYSTEINE'), 48.93 -"CSO": ('c', 'p', 'M', None, 'S-HYDROXYCYSTEINE'), 48.94 -"193": ('x', 'p', None, None, '"(2S)-4-(BETA-ALANYLAMINO)-2-AMINOBUTANOIC'), 48.95 -"1TY": ('y', 'p', 'M', None, '3-{(3E)-4-HYDROXY-6-OXO-3-[(2-PHENYLETHYL)IMINO]CYCLOHEXA-1,4-DIEN-1-YL}ALANINE'), 48.96 -"CB2": ('c', 'd', 'M', None, '"PHOSPHORIC'), 48.97 -"CS3": ('c', 'p', 'M', None, 'S-[3-OXO-3-(2-THIENYL)PROPYL]-L-CYSTEINE'), 48.98 -"64T": ('t', 'd', 'M', None, '"5-HYDROXY-THYMIDINE-5\'-MONOPHOSPHATE"'), 48.99 -"4AC": ('n', 'r', None, None, '"N(4)-ACETYLCYTIDINE-5\'-MONOPHOSPHATE"'), 48.100 -"B3X": ('n', 'p', 'M', None, '"(3S)-3,5-DIAMINO-5-OXOPENTANOIC'), 48.101 -"TST": ('x', 'p', None, None, '"4-METHYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PENTANOIC'), 48.102 -"CMT": ('c', 'p', 'M', None, 'O-METHYLCYSTEINE'), 48.103 -"CMR": ('c', 'd', 'M', None, '"2\'-DEOXY-CYTIDINE-5\'-RP-MONOMETHYLPHOSPHONATE"'), 48.104 -"CSH": ('x', 'p', None, None, '"[2-(2-HYDROXY-1-METHYL-ETHYL)-4-(1H-IMIDAZOL-4-YLMETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC'), 48.105 -"VLM": ('x', 'p', None, None, 'VALINYLAMINE'), 48.106 -"G": ('G', 'r', None, None, '"GUANOSINE-5\'-MONOPHOSPHATE"'), 48.107 -"C4S": ('c', 'd', 'M', None, "2'-deoxy-4'-thiocytidine 5'-(dihydrogen phosphate)"), 48.108 -"U2L": ('u', 'r', 'M', None, "5-METHYL-3'-O-METHOXYETHYL URIDINE-5'-MONOPHOSPHATE"), 48.109 -"LSO": ('x', 'p', None, None, '(Z)-N~6~-(3-CARBOXY-1-{[(4-CARBOXY-2-OXOBUTYL)SULFONYL]METHYL}PROPYLIDENE)-L-LYSINE'), 48.110 -"U2N": ('n', 'd', None, None, "2'-AMINO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.111 -"PLE": ('l', 'p', 'M', None, '"LEUCINE'), 48.112 -"SIN": ('x', 'p', None, None, '"SUCCINIC'), 48.113 -"M2L": ('k', 'p', 'M', None, '"(2R)-2-AMINO-3-(2-DIMETHYLAMINOETHYLSULFANYL)PROPANOIC'), 48.114 -"I58": ('k', 'p', 'M', None, '4R-FLUORO-N6-ETHANIMIDOYL-L-LYSINE'), 48.115 -"U2P": ('u', 'r', 'M', None, '"PHOSPHORIC'), 48.116 -"B2A": ('a', 'p', 'M', None, '"ALANINE'), 48.117 -"TTM": ('t', 'd', 'M', None, '"N3-ETHYL-THYMIDINE-5\'-MONOPHOSPHATE"'), 48.118 -"CSD": ('c', 'p', 'M', None, '3-SULFINOALANINE'), 48.119 -"M2G": ('g', 'r', 'M', None, '"N2-DIMETHYLGUANOSINE-5\'-MONOPHOSPHATE"'), 48.120 -"ASX": ('x', 'p', None, None, '"ASP/ASN'), 48.121 -"AFF": ('n', 'd', None, None, '2-ACETYLAMINOFLUORENE-3-YL'), 48.122 -"TTD": ('t', 'd', 'M', None, '"CIS-SYN'), 48.123 -"AET": ('a', 'r', 'M', None, '"N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)METHYLCARBAMOYL]THREONINE-5\'-MONOPHOSPHATE"'), 48.124 -"C45": ('c', 'd', 'M', None, '"N4-METHOXY-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.125 -"C46": ('c', 'd', 'M', None, '"6H,8H-3,4-DIHYDROPYRIMIDO[4,5-C][1,2]OXAZIN-7-0NE(CYTIDINE)-5\'-MONOPHOSPHATE"'), 48.126 -"FTR": ('w', 'p', 'M', None, 'FLUOROTRYPTOPHANE'), 48.127 -"BLY": ('k', 'p', 'M', None, '"LYSINE'), 48.128 -"C42": ('c', 'd', 'M', None, '"3\'-AMINO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.129 -"C2S": ('c', 'd', 'M', None, '"CYTIDINE-5\'-DITHIOPHOSPHORATE"'), 48.130 -"FTY": ('y', 'p', 'M', None, 'DEOXY-DIFLUOROMETHELENE-PHOSPHOTYROSINE'), 48.131 -"CSA": ('c', 'p', 'M', None, 'S-ACETONYLCYSTEINE'), 48.132 -"I5C": ('c', 'd', 'M', None, '"5-IODO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.133 -"ALY": ('k', 'p', 'M', None, 'N(6)-ACETYLLYSINE'), 48.134 -"P1L": ('c', 'p', 'M', None, 'S-PALMITOYL-L-CYSTEINE'), 48.135 -"C49": ('c', 'd', 'M', None, '"4-THIO,5-FLUORO,5-METHYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.136 -"DSP": ('d', 'p', 'M', None, '"D-ASPARTIC'), 48.137 -"AFA": ('n', 'p', 'M', None, 'N-[7-METHYL-OCT-2,4-DIENOYL]ASPARAGINE'), 48.138 -"P1P": ('n', 'r', None, None, '3-{[OXIDO(OXO)PHOSPHINO]OXY}PROPAN-1-OLATE'), 48.139 -"U25": ('u', 'r', 'M', None, '"URIDINE-5\'-MONOPHOSPHATE"'), 48.140 -"CHS": ('x', 'p', None, None, '"4-AMINO-5-CYCLOHEXYL-3-HYDROXY-PENTANOIC'), 48.141 -"BLE": ('l', 'p', 'M', None, '"LEUCINE'), 48.142 -"DSG": ('n', 'p', 'M', None, 'D-ASPARAGINE'), 48.143 -"DSE": ('s', 'p', 'M', None, 'N-METHYL-D-SERINE'), 48.144 -"S4A": ('a', 'd', 'M', None, "2'-deoxy-4'-thioadenosine 5'-(dihydrogen phosphate)"), 48.145 -"DLS": ('k', 'p', 'M', None, 'DI-ACETYL-LYSINE'), 48.146 -"4SU": ('u', 'r', 'M', None, '"4-THIOURIDINE-5\'-MONOPHOSPHATE"'), 48.147 -"MA": ('a', 'p', 'M', None, '"METHYL'), 48.148 -"TRX": ('w', 'p', 'M', None, '6-HYDROXYTRYPTOPHAN'), 48.149 -"DC": ('C', 'd', None, None, '"2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE"'), 48.150 -"HLU": ('l', 'p', 'M', None, 'BETA-HYDROXYLEUCINE'), 48.151 -"HSE": ('s', 'p', 'M', None, 'L-HOMOSERINE'), 48.152 -"A5M": ('c', 'r', 'M', None, '"2\'-AMINE-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.153 -"ALS": ('a', 'p', 'M', None, '"2-AMINO-3-OXO-4-SULFO-BUTYRIC'), 48.154 -"CBR": ('c', 'd', 'M', None, '"5-BROMO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.155 -"SHC": ('c', 'p', 'M', None, 'S-HEXYLCYSTEINE'), 48.156 -"B2F": ('f', 'p', 'M', None, '"PHENYLALANINE'), 48.157 -"QUO": ('g', 'r', 'M', None, '"2-AMINO-7-DEAZA-(2\'\',3\'\'-DIHYDROXY-CYCLOPENTYLAMINO)-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.158 -"X4A": ('n', 'd', None, None, '"[(2R,3S,5S)-2,3,5-TRIHYDROXYTETRAHYDROFURAN-2-YL]METHYL'), 48.159 -"BE2": ('x', 'p', None, None, '"2-AMINOBENZOIC'), 48.160 -"CM0": ('n', 'r', None, None, "5-(CARBOXYMETHOXY) URIDINE-5'-MONOPHOSPHATE"), 48.161 -"GLH": ('q', 'p', 'M', None, 'N-5-CYCLOHEXYL-N-5-[(CYCLOHEXYLAMINO)CARBONYL]GLUTAMINE'), 48.162 -"DOH": ('d', 'p', 'M', None, '"BETA-HYDROXY'), 48.163 -"DON": ('l', 'p', 'M', None, '6-DIAZENYL-5-OXO-L-NORLEUCINE'), 48.164 -"GLM": ('x', 'p', None, None, '1-AMINO-PROPAN-2-ONE'), 48.165 -"GLN": ('Q', 'p', None, None, 'GLUTAMINE'), 48.166 -"T4S": ('t', 'd', 'M', None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-HYDROSELENO-5-METHYLPYRIMIDIN-2(1H)-ONE'), 48.167 -"DOC": ('c', 'd', 'M', None, '"2\',3\'-DIDEOXYCYTIDINE-5\'-MONOPHOSPHATE"'), 48.168 -"T3P": ('t', 'd', 'M', None, '"THYMIDINE-3\'-PHOSPHATE"'), 48.169 -"DOA": ('x', 'p', None, None, '"12-AMINO-DODECANOIC'), 48.170 -"TOX": ('w', 'p', 'M', None, '2-AMINO-3-(1-HYDROPEROXY-1H-INDOL-3-YL)PROPAN-1-OL'), 48.171 -"SMP": ('a', 'd', 'M', None, '"2\'-DEOXY-ADENOSINE-5\'-SP-MONOMETHYLPHOSPHONATE"'), 48.172 -"GLX": ('x', 'p', None, None, '"GLU/GLN'), 48.173 -"GLY": ('G', 'p', None, None, 'GLYCINE'), 48.174 -"GLZ": ('g', 'p', 'M', None, 'AMINO-ACETALDEHYDE'), 48.175 -"CTG": ('t', 'd', 'M', None, '"(5R,6S)-5,6-DIHYDRO-5,6-DIHYDROXYTHYMIDINE-5\'-MONOPHOSPHATE"'), 48.176 -"T38": ('n', 'r', None, None, "2'-O-ETHOXYMETHYLENE THYMIDINE 5'-MONOPHOSPHATE"), 48.177 -"AKL": ('d', 'p', 'M', None, '"3-AMINO-5-CHLORO-4-OXOPENTANOIC'), 48.178 -"T36": ('t', 'd', 'M', None, "SPLIT LINKAGE THYMIDINE 5'-MONOPHOSPHATE"), 48.179 -"T37": ('t', 'd', 'M', None, "3'-AMINO-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"), 48.180 -"4TA": ('n', 'r', None, None, '"P1-(5\'-ADENOSYL)P4-(5\'-(2\'-DEOXY-THYMIDYL))TETRAPHOSPHATE"'), 48.181 -"CTH": ('t', 'p', 'M', None, '4-CHLOROTHREONINE'), 48.182 -"GLU": ('E', 'p', None, None, '"GLUTAMIC'), 48.183 -"RCY": ('c', 'p', 'M', None, '"S-[(3S,3\'R)-1\'-HYDROXY-2\',2\',5\',5\'-TETRAMETHYL-2,5-DIOXO-1,3\'-BIPYRROLIDIN-3-YL]-L-CYSTEINE"'), 48.184 -"MDH": ('x', 'p', None, None, 'N-METHYLDEHYDROBUTYRINE'), 48.185 -"MDO": ('x', 'p', None, None, '"{2-[(1S)-1-AMINOETHYL]-5-HYDROXY-4-METHYL-1H-IMIDAZOL-1-YL}ACETIC'), 48.186 -"8OG": ('g', 'd', 'M', None, '"8-OXO-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.187 -"HCS": ('x', 'p', None, None, '"2-AMINO-4-MERCAPTO-BUTYRIC'), 48.188 -"UD5": ('u', 'r', 'M', None, '5-FLUOROURIDINE'), 48.189 -"O2C": ('n', 'd', None, None, '"3\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.190 -"5FU": ('u', 'r', 'M', None, '"5-FLUORO-URIDINE-5\'-MONOPHOSPHATE"'), 48.191 -"SDG": ('g', 'd', 'M', None, '2-AMINO-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURINE-6-SELENOL'), 48.192 -"MDR": ('n', 'd', None, None, '9-(2-DEOXY-BETA-D-RIBOFURANOSYL)-6-METHYLPURINE'), 48.193 -"BSE": ('s', 'p', 'M', None, 'BETA-3-SERINE'), 48.194 -"4IN": ('x', 'p', None, None, '4-AMINO-L-TRYPTOPHAN'), 48.195 -"FCL": ('f', 'p', 'M', None, '3-CHLORO-L-PHENYLALANINE'), 48.196 -"1SC": ('c', 'r', 'M', None, '"5\'-O-THIOPHOSPHONOCYTIDINE"'), 48.197 -"R7A": ('c', 'p', 'M', None, '3-S-[(4-BROMO-2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROL-3-YL)METHYL]SULFANYL-L-CYSTEINE'), 48.198 -"6IA": ('a', 'r', 'M', None, '"N6-ISOPENTENYL-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.199 -"XTS": ('g', 'r', 'M', None, '9-[(2R,3R,4S,5R)-3,4-DIHYDROXY-5-(HYDROXYMETHYL)OXOLAN-2-YL]-3H-PURINE-2,6-DIONE'), 48.200 -"DFT": ('n', 'd', None, None, '"1-[2-DEOXYRIBOFURANOSYL]-2,4-DIFLUORO-5-METHYL-BENZENE-5\'MONOPHOSPHATE"'), 48.201 -"FMU": ('n', 'r', None, None, '"5-FLUORO-5-METHYLURIDINE-5\'-MONOPHOSPHATE"'), 48.202 -"XTY": ('n', 'd', None, None, '(1R)-1,4-ANHYDRO-2-DEOXY-1-(6-METHYL-2,4-DIOXO-1,2,3,4-TETRAHYDROQUINAZOLIN-8-YL)-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL'), 48.203 -"MVA": ('v', 'p', 'M', None, 'N-METHYLVALINE'), 48.204 -"LTA": ('x', 'p', None, None, '"4,8-DIAMINO-3-HYDROXY-OCTANOIC'), 48.205 -"01W": ('x', 'p', None, None, '(2S)-2-AMMONIO-4-[(2,4-DINITROPHENYL)AMINO]BUTANOATE'), 48.206 -"DFI": ('x', 'p', None, None, '2,2-DIFLUOROSTATINE'), 48.207 -"YG": ('g', 'r', 'M', None, 'WYBUTOSINE'), 48.208 -"FMG": ('g', 'd', 'M', None, '2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-7-METHYL-6-OXO-6,9-DIHYDRO-1H-PURIN-7-IUM'), 48.209 -"DFG": ('g', 'd', 'M', None, "2'-DEOXY-L-RIBO-FURANOSYL GUANINE-5'-MONOPHOSPHATE"), 48.210 -"XTL": ('t', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(THYMIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'), 48.211 -"DFC": ('c', 'd', 'M', None, "2'-DEOXY-L-RIBO-FURANOSYL CYTOSINE-5'-MONOPHOSPHATE"), 48.212 -"XTH": ('t', 'd', 'M', None, '1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-5-METHYLPYRIMIDINE-2,4(1H,3H)-DIONE'), 48.213 -"NTY": ('y', 'p', 'M', None, '"BETA'), 48.214 -"EDC": ('g', 'd', 'M', None, '"N3,N4-ETHENO-2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE"'), 48.215 -"T49": ('t', 'd', 'M', None, "S4'-2'DEOXYTHYMIDINE 5'-MONOPHOSPHATE"), 48.216 -"EDA": ('a', 'd', 'M', None, '"3-[2-DEOXY-RIBOFURANOSYL]-3H-1,3,4,5A,8-PENTAAZA-AS-INDACENE-5\'-MONOPHOSPHATE"'), 48.217 -"O12": ('x', 'p', None, None, 'N~5~-DODECANOYL-L-ORNITHINE'), 48.218 -"MMT": ('t', 'd', 'M', None, '"5\'-O-(DIMETHYLAMINO)-THYMIDINE"'), 48.219 -"T41": ('n', 'r', None, None, "1-(2-O-METHYL-BETA-D-ARABINOFURANOSYL)THYMIDINE 5'-MONOPHOSPHATE"), 48.220 -"LMS": ('n', 'r', None, None, '"[(2R,3S,4R,5R)-5-(6-AMINO-9H-PURIN-9-YL)-3,4-DIHYDROXYTETRAHYDRO-2-FURANYL]METHYL'), 48.221 -"0AP": ('c', 'd', 'M', None, "2'-deoxycytidine 3',5'-bis(dihydrogen phosphate)"), 48.222 -"ORQ": ('r', 'p', 'M', None, 'N~5~-ACETYL-L-ORNITHINE'), 48.223 -"ORN": ('a', 'p', 'M', None, 'ORNITHINE'), 48.224 -"ALM": ('a', 'p', 'M', None, '1-METHYL-ALANINAL'), 48.225 -"VB1": ('k', 'p', 'M', None, 'N^6^-[(1R)-2-{[(1S)-1-CARBOXYPROPYL]AMINO}-2-OXO-1-(SULFANYLMETHYL)ETHYL]-6-OXO-L-LYSINE'), 48.226 -"PUY": ('n', 'r', None, None, 'PUROMYCIN'), 48.227 -"SBD": ('s', 'p', 'M', None, '"D-NAPHTHYL-1-ACETAMIDO'), 48.228 -"LPS": ('s', 'p', 'M', None, 'O-{HYDROXY[((2R)-2-HYDROXY-3-{[(1S)-1-HYDROXYPENTADECYL]OXY}PROPYL)OXY]PHOSPHORYL}-L-SERINE'), 48.229 -"SC": ('c', 'd', 'M', None, '"2-DEOXY-CYTIDINE-5\'-THIOPHOSPHORATE"'), 48.230 -"ALT": ('a', 'p', 'M', None, 'THIOALANINE'), 48.231 -"GL3": ('g', 'p', 'M', None, 'THIOGLYCIN'), 48.232 -"LME": ('e', 'p', 'M', None, '"(3R)-3-METHYL-L-GLUTAMIC'), 48.233 -"SBL": ('s', 'p', 'M', None, '"L-NAPHTHYL-1-ACETAMIDO'), 48.234 -"ALQ": ('x', 'p', None, None, '"2-METHYL-PROPIONIC'), 48.235 -"CAB": ('a', 'p', 'M', None, '4-CARBOXY-4-AMINOBUTANAL'), 48.236 -"GPL": ('k', 'p', 'M', None, "LYSINE GUANOSINE-5'-MONOPHOSPHATE"), 48.237 -"CAY": ('c', 'p', 'M', None, 'CARBOXYMETHYLENECYSTEINE'), 48.238 -"DPR": ('p', 'p', 'M', None, 'D-PROLINE'), 48.239 -"DPQ": ('y', 'p', 'M', None, '"(S)-2-AMINO-3-(4-HYDROXY-3-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC'), 48.240 -"DPP": ('a', 'p', 'M', None, '"DIAMMINOPROPANOIC'), 48.241 -"CAS": ('c', 'p', 'M', None, 'S-(DIMETHYLARSENIC)CYSTEINE'), 48.242 -"NC1": ('s', 'p', 'M', None, '"NITROCEFIN'), 48.243 -"IYR": ('y', 'p', 'M', None, '3-IODO-TYROSINE'), 48.244 -"CAV": ('x', 'p', None, None, '"5-AMINO-6-CYCLOHEXYL-3,4-DIHYDROXY-2-ISOPROPYL-HEXANOIC'), 48.245 -"DPY": ('n', 'd', None, None, "2-DEOXYRIBOFURANOSYL-PYRIDINE-2,6-DICARBOXYLIC ACID-5'-MONOPHOSPHATE"), 48.246 -"5AA": ('a', 'd', 'M', None, '"N6-DIMETHYL-3\'-AMINO-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.247 -"DPB": ('t', 'd', 'M', None, '"(S)-1-[2\'-DEOXY-3\',5\'-O-(1-PHOSPHONO)BENZYLIDENE-B-D-THREO-PENTOFURANOSYL]THYMINE"'), 48.248 -"CAL": ('x', 'p', None, None, '"5-AMINO-6-CYCLOHEXYL-4-HYDROXY-2-ISOBUTYL-HEXANOIC'), 48.249 -"DPN": ('f', 'p', 'M', None, 'D-PHENYLALANINE'), 48.250 -"BAL": ('a', 'p', 'M', None, 'BETA-ALANINE'), 48.251 -"DPL": ('p', 'p', 'M', None, '4-OXOPROLINE'), 48.252 -"CAF": ('c', 'p', 'M', None, 'S-DIMETHYLARSINOYL-CYSTEINE'), 48.253 -"DPH": ('f', 'p', 'M', None, 'DEAMINO-METHYL-PHENYLALANINE'), 48.254 -"HIP": ('h', 'p', 'M', None, 'ND1-PHOSPHONOHISTIDINE'), 48.255 -"LG": ('g', 'r', 'M', None, '"L-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.256 -"LA2": ('k', 'p', 'M', None, 'N~6~-[(6R)-6,8-DISULFANYLOCTANOYL]-L-LYSINE'), 48.257 -"KOR": ('m', 'p', 'M', None, 'L-HOMOCYSTEINE-S-N-S-L-CYSTEINE'), 48.258 -"LC": ('c', 'r', 'M', None, '"L-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.259 -"D4M": ('t', 'd', 'M', None, '"[(5R)-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-2,5-DIHYDROFURAN-2-YL]METHYL'), 48.260 -"5MD": ('n', 'd', None, None, '"5-METHYL-2\'-DEOXYPSEUDOURIDINE"'), 48.261 -"PGN": ('g', 'd', 'M', None, '"2\'-DEOXYGUANOSINE-3\',5\'-DIPHOSPHATE"'), 48.262 -"NHL": ('e', 'p', 'M', None, '"(4S)-4-(2-NAPHTHYLMETHYL)-D-GLUTAMIC'), 48.263 -"PGL": ('x', 'p', None, None, '"AMINOMETHYLENEPHOSPHINIC'), 48.264 -"TA3": ('t', 'd', 'M', None, '(4S,5R)-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYL-1,3-DIAZABICYCLO[2.2.0]HEXAN-2-ONE'), 48.265 -"XCP": ('x', 'p', None, None, '"(1S,2S)-2-AMINOCYCLOPENTANECARBOXYLIC'), 48.266 -"DYS": ('c', 'p', 'M', None, 'S-[5-(2-AMINOETHYL)-2,3-DIHYDROXYPHENYL]-L-CYSTEINE'), 48.267 -"HIA": ('h', 'p', 'M', None, '"L-HISTIDINE'), 48.268 -"HIC": ('h', 'p', 'M', None, '4-METHYL-HISTIDINE'), 48.269 -"PGP": ('g', 'r', 'M', None, '"GUANOSINE-3\',5\'-DIPHOSPHATE"'), 48.270 -"AB7": ('x', 'p', None, None, '"ALPHA-AMINOBUTYRIC'), 48.271 -"IIL": ('i', 'p', 'M', None, 'ISO-ISOLEUCINE'), 48.272 -"DYG": ('x', 'p', None, None, '"(3S)-3-AMINO-3-[(4Z)-1-(CARBOXYMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-IMIDAZOL-2-YL]PROPANOIC'), 48.273 -"PGY": ('g', 'p', 'M', None, 'PHENYLGLYCINE'), 48.274 -"A": ('A', 'r', None, None, '"ADENOSINE-5\'-MONOPHOSPHATE"'), 48.275 -"23F": ('f', 'p', 'M', None, '"(2Z)-2-AMINO-3-PHENYLACRYLIC'), 48.276 -"143": ('c', 'p', 'M', None, 'S-2,3-DIHYDRO-5-GLYCIN-2-YL-ISOXAZOL-3-YL-CYSTEINE'), 48.277 -"HIQ": ('h', 'p', 'M', None, '1-[1,2-DIHYDROXY-1-(HYDROXYMETHYL)ETHYL]-L-HISTIDINE'), 48.278 -"THR": ('T', 'p', None, None, 'THREONINE'), 48.279 -"PG7": ('g', 'd', 'M', None, '"((2R,3R,5R)-5-(2-AMINO-6-HYDROXY-9H-PURIN-9-YL)-3-HYDROXY-TETRAHYDROFURAN-2-YL)METHYL'), 48.280 -"ABR": ('a', 'd', 'M', None, '"(R)-(N-PHENYL-2-HYDROXY-ETHYL)-2\'-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.281 -"HIS": ('H', 'p', None, None, 'HISTIDINE'), 48.282 -"ABT": ('n', 'd', None, None, "3'-AZIDO-3'-DEOXY-THYMIDINE-5'-ALPHA BORANO TRIPHOSPHATE"), 48.283 -"PG1": ('s', 'p', 'M', None, '"PENICILLIN'), 48.284 -"2LU": ('l', 'p', 'M', None, '"2-AMINO-4-METHYL-PENTANYL'), 48.285 -"NZH": ('h', 'p', 'M', None, '(2S)-2-AMINO-3-[1-(1H-TETRAAZOL-5-YL)-1H-IMIDAZOL-4-YL]PROPANAL'), 48.286 -"P5P": ('a', 'r', 'M', None, "PURINE RIBOSIDE-5'-MONOPHOSPHATE"), 48.287 -"STY": ('y', 'p', 'M', None, '"TYROSINE-O-SULPHONIC'), 48.288 -"PG9": ('g', 'p', 'M', None, 'D-PHENYLGLYCINE'), 48.289 -"NCX": ('n', 'd', None, None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE-3-CARBOXAMIDE'), 48.290 -"NCY": ('x', 'p', None, None, 'N-METHYLCYSTEINE'), 48.291 -"CFL": ('c', 'd', 'M', None, '4-AMINO-1-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)PYRIMIDIN-2(1H)-ONE'), 48.292 -"3MD": ('d', 'p', 'M', None, '"2S,3S-3-METHYLASPARTIC'), 48.293 -"1AP": ('a', 'd', 'M', None, '"2,6-DIAMINOPURINE'), 48.294 -"TFE": ('t', 'd', 'M', None, "2'-O-[2-(TRIFLUORO)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"), 48.295 -"D3T": ('t', 'd', 'M', None, '"2\',3\'-DIDEOXY-THYMIDINE-5\'-TRIPHOSPHATE"'), 48.296 -"TFA": ('x', 'p', None, None, '"TRIFLUOROACETYL'), 48.297 -"BHD": ('d', 'p', 'M', None, '"BETA-HYDROXYASPARTIC'), 48.298 -"ONL": ('x', 'p', None, None, '5-OXO-L-NORLEUCINE'), 48.299 -"CFY": ('x', 'p', None, None, '"[(2S)-2-{(2R)-2-[(1S)-1-AMINO-2-PHENYLETHYL]-2-HYDROXY-2,5-DIHYDRO-1,3-THIAZOL-4-YL}-4-(4-HYDROXYBENZYL)-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.300 -"QLG": ('x', 'p', None, None, '"[(4Z)-2-[(1Z)-4-AMINO-4-OXOBUTANIMIDOYL]-4-(2-METHYLPROPYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.301 -"NCB": ('a', 'p', 'M', None, 'N-CARBAMOYL-ALANINE'), 48.302 -"TFQ": ('f', 'p', 'M', None, '4-(2,2,2-TRIFLUOROETHYL)-L-PHENYLALANINE'), 48.303 -"XXY": ('x', 'p', None, None, '2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-1-(CARBOXYMETHYL)-4-(1H-IMIDAZOL-5-YLMETHYL)-1H-IMIDAZOL-5-OLATE'), 48.304 -"23S": ('x', 'p', None, None, '"(S)-2-AMINO-3-(6H-SELENOLO[2,3-B]-PYRROL-4-YL)-PROPIONIC'), 48.305 -"0AU": ('n', 'd', None, None, "5-iodouridine 5'-(dihydrogen phosphate)"), 48.306 -"GHP": ('g', 'p', 'M', None, '4-HYDROXYPHENYLGLYCINE'), 48.307 -"MIA": ('a', 'r', 'M', None, '"2-METHYLTHIO-N6-ISOPENTENYL-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.308 -"AP7": ('a', 'd', 'M', None, "N1-PROTONATED ADENOSINE-5'-MONOPHOSPHATE"), 48.309 -"OAD": ('n', 'r', None, None, "2'-O-ACETYL ADENOSINE-5-DIPHOSPHORIBOSE"), 48.310 -"UDP": ('n', 'r', None, None, '"URIDINE-5\'-DIPHOSPHATE"'), 48.311 -"GHG": ('q', 'p', 'M', None, 'GAMMA-HYDROXY-GLUTAMINE'), 48.312 -"ZFB": ('x', 'p', None, None, '(3R)-3-{[(BENZYLOXY)CARBONYL]AMINO}-2-OXO-4-PHENYLBUTANE-1-DIAZONIUM'), 48.313 -"C38": ('c', 'd', 'M', None, '"5-IODO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.314 -"HAQ": ('x', 'p', None, None, '"5-AMINO-4-OXO-1,2,4,5,6,7-HEXAHYDRO-AZEPINO[3,2,1-HI]INDOLE-2-CARBOXYLIC'), 48.315 -"C34": ('c', 'd', 'M', None, '"N4-METHYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.316 -"C37": ('c', 'd', 'M', None, '"5-FLUORO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.317 -"ONE": ('u', 'r', 'M', None, '"1-(BETA-D-RIBOFURANOSYL)-PYRIDIN-4-ONE-5\'-PHOSPHATE"'), 48.318 -"C31": ('c', 'r', 'M', None, "2'-O-3-AMINOPROPYL CYTIDINE-5'-MONOPHOSPHATE"), 48.319 -"HAR": ('r', 'p', 'M', None, 'N-OMEGA-HYDROXY-L-ARGININE'), 48.320 -"TY2": ('y', 'p', 'M', None, '3-AMINO-L-TYROSINE'), 48.321 -"TY3": ('y', 'p', 'M', None, '3-HYDROXY-L-TYROSINE'), 48.322 -"RT": ('n', 'r', None, None, '"RIBOSYLTHYMINE-5\'-MONOPHOSPHATE"'), 48.323 -"4PE": ('c', 'd', 'M', None, '3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3H-PYRROLO[2,3-D]PYRIMIDIN-2-OL'), 48.324 -"4PD": ('c', 'd', 'M', None, '3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-1,3-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE'), 48.325 -"4PC": ('c', 'd', 'M', None, '"3-(2\'-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-6-METHYL-3,7-DIHYDRO-2H-PYRROLO[2,3-D]PYRIMIDIN-2-ONE"'), 48.326 -"SAC": ('s', 'p', 'M', None, 'N-ACETYL-SERINE'), 48.327 -"PRO": ('P', 'p', None, None, 'PROLINE'), 48.328 -"PRN": ('a', 'd', 'M', None, "PURINE 2'-DEOXYRIBO-5'-MONOPHOSPHATE"), 48.329 -"6CL": ('k', 'p', 'M', None, '6-CARBOXYLYSINE'), 48.330 -"4PH": ('f', 'p', 'M', None, '4-METHYL-L-PHENYLALANINE'), 48.331 -"FGL": ('g', 'p', 'M', None, '"2-AMINOPROPANEDIOIC'), 48.332 -"6CT": ('t', 'd', 'M', None, '"PHOSPHORIC'), 48.333 -"IAS": ('d', 'p', 'M', None, '"BETA-ASPARTYL'), 48.334 -"PRS": ('p', 'p', 'M', None, 'THIOPROLINE'), 48.335 -"UR3": ('u', 'r', 'M', None, '"3-METHYLURIDINE-5\'-MONOPHOSHATE"'), 48.336 -"SAR": ('g', 'p', 'M', None, 'SARCOSINE'), 48.337 -"TCY": ('a', 'd', 'M', None, '"(2R,3AS,4AR,5AR,5BS)-2-(6-AMINO-9H-PURIN-9-YL)-3A-HYDROXYHEXAHYDROCYCLOPROPA[4,5]CYCLOPENTA[1,2-B]FURAN-5A(4H)-YL'), 48.338 -"4F3": ('x', 'p', None, None, '"[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(4-FLUORO-1H-INDOL-3-YLMETHYL)-5-HYDROXY-IMIDAZOL-1-YL]-ACETIC'), 48.339 -"TPG": ('g', 'r', 'M', None, '"2,2,7-TRIMETHYL-GUANOSINE-5\'-TRIPHOSPHATE-5\'-GUANOSINE"'), 48.340 -"LAL": ('a', 'p', 'M', None, 'N,N-DIMETHYL-L-ALANINE'), 48.341 -"TPC": ('c', 'd', 'M', None, "5'-THIO-2'-DEOXY-CYTOSINE PHOSPHONIC ACID"), 48.342 -"PPU": ('a', 'r', 'M', None, '"PUROMYCIN-5\'-MONOPHOSPHATE"'), 48.343 -"CHF": ('x', 'p', None, None, 'CYCLOHEXYLFLUOROSTATONE'), 48.344 -"BFD": ('d', 'p', 'M', None, '"ASPARTATE'), 48.345 -"TPO": ('t', 'p', 'M', None, 'PHOSPHOTHREONINE'), 48.346 -"H5M": ('p', 'p', 'M', None, 'TRANS-3-HYDROXY-5-METHYLPROLINE'), 48.347 -"AYG": ('x', 'p', None, None, '"[(4E)-2-[(1S)-1-AMINOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.348 -"TPL": ('w', 'p', 'M', None, 'TRYPTOPHANOL'), 48.349 -"AYA": ('a', 'p', 'M', None, 'N-ACETYLALANINE'), 48.350 -"6MC": ('a', 'r', 'M', None, '"CIS-N6-METHYL-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.351 -"6MA": ('a', 'r', 'M', None, '"N6-METHYL-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.352 -"GSR": ('g', 'd', 'M', None, "2'-DEOXY-N2-(R)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"), 48.353 -"EYS": ('x', 'p', None, None, 'S-SELANYLCYSTEINAL'), 48.354 -"LPL": ('x', 'p', None, None, 'LEU-HYDROXYETHYLENE-LEU'), 48.355 -"6MZ": ('n', 'r', None, None, '"N6-METHYLADENOSINE-5\'-MONOPHOSPHATE"'), 48.356 -"PR3": ('c', 'p', 'M', None, 'S,S-PROPYLTHIOCYSTEINE'), 48.357 -"2ST": ('t', 'd', 'M', None, "5-METHYL-2'-SE-METHYL-2'-SELENOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.358 -"0NC": ('a', 'p', 'M', None, 'N-METHYL-L-ALANINAMIDE'), 48.359 -"MRG": ('g', 'd', 'M', None, '"N2-(3-MERCAPTOPROPYL)-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.360 -"KYN": ('a', 'p', 'M', None, 'KYNURENINE'), 48.361 -"G2S": ('g', 'd', 'M', None, '"GUANOSINE-5\'-DITHIOPHOSPHORATE"'), 48.362 -"PYX": ('c', 'p', 'M', None, 'S-[S-THIOPYRIDOXAMINYL]CYSTEINE'), 48.363 -"PYY": ('n', 'r', None, None, '"D-RIBOFURANOSYL-BENZENE-5\'-MONOPHOSPHATE"'), 48.364 -"TYN": ('y', 'p', 'M', None, 'AMINOBENZOFURAZAN-O-TYROSINE'), 48.365 -"TYO": ('y', 'p', 'M', None, '"(4Z,6E)-2-AMINO-7-HYDROPEROXY-4-[(E)-2-HYDROXYVINYL]HEPTA-4,6-DIENOIC'), 48.366 -"KAG": ('g', 'r', 'M', None, "2'-DEOXY-N-[(1S)-1-METHYL-3-OXOPROPYL]GUANOSINE 5'-PHOSPHATE"), 48.367 -"IPG": ('g', 'p', 'M', None, '"N-ISOPROPYL'), 48.368 -"GH3": ('g', 'r', 'M', None, '"3\'-DEOXY-GUANOSINE-5\'-TRIPHOSPHATE"'), 48.369 -"APP": ('x', 'p', None, None, '1-ACETYL-2-CARBOXYPIPERIDINE'), 48.370 -"IPN": ('n', 'd', None, None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-5-IODOURACIL'), 48.371 -"TYB": ('y', 'p', 'M', None, 'TYROSINAL'), 48.372 -"FAG": ('n', 'd', None, None, "[1',2'-DIDEOXY[2-AMINO-5-([9-HYDROXY-AFLATOXINB2-8-YL]-FORMYL-AMINO)-6-OXO-1,6-IHYDRO-PYRIMIDIN-4-YLAMINO]-RIBOFURANOSE]-5-MONOPHOSPHATE GROUP"), 48.373 -"DBY": ('y', 'p', 'M', None, '"3,5'), 48.374 -"APK": ('k', 'p', 'M', None, '"5\'-O-[(S)-{[(5S)-5-AMINO-6-OXOHEXYL]AMINO}(HYDROXY)PHOSPHORYL]ADENOSINE"'), 48.375 -"APH": ('a', 'p', 'M', None, 'P-AMIDINOPHENYL-3-ALANINE'), 48.376 -"API": ('k', 'p', 'M', None, '"2,6-DIAMINOPIMELIC'), 48.377 -"TYX": ('x', 'p', None, None, 'S-(2-ANILINO-2-OXOETHYL)-L-CYSTEINE'), 48.378 -"APO": ('x', 'p', None, None, '"D-2-AMINO-3-PHOSPHONO-PROPIONIC'), 48.379 -"APM": ('x', 'p', None, None, 'M-AMIDINOPHENYL-3-ALANINE'), 48.380 -"TYT": ('y', 'p', 'M', None, '"TYROSINE'), 48.381 -"TYU": ('n', 'r', None, None, 'TETRAHYDROURIDINE'), 48.382 -"DBS": ('s', 'p', 'M', None, '"2-(2,3-DIHYDROXY-BENZOYLAMINO)-3-HYDROXY-PROPIONIC'), 48.383 -"DBU": ('a', 'p', 'M', None, '"(2E)-2-AMINOBUT-2-ENOIC'), 48.384 -"TYQ": ('y', 'p', 'M', None, '3-AMINO-6-HYDROXY-TYROSINE'), 48.385 -"TYR": ('Y', 'p', None, None, 'TYROSINE'), 48.386 -"APE": ('x', 'p', None, None, '"(1-AMINO-2-PHENYL-ETHYL)-CARBAMIC'), 48.387 -"BT5": ('n', 'r', None, None, 'BIOTINYL-5-AMP'), 48.388 -"KST": ('k', 'p', 'M', None, 'N~6~-(5-CARBOXY-3-THIENYL)-L-LYSINE'), 48.389 -"2AD": ('x', 'p', None, None, '"2\'-AMINO-2\'-DEOXYADENOSINE"'), 48.390 -"ARG": ('R', 'p', None, None, 'ARGININE'), 48.391 -"HDP": ('n', 'd', None, None, '"[(1S,6S)-6-HYDROXY-4-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOHEX-2-EN-1-YL]METHYL'), 48.392 -"CZ2": ('c', 'p', 'M', None, 'S-(DIHYDROXYARSINO)CYSTEINE'), 48.393 -"2AO": ('x', 'p', None, None, '(2S)-2-AMINOHEXAN-1-OL'), 48.394 -"G33": ('g', 'd', 'M', None, "8-METHYL-2'-DEOXYGUANOSINE 3'-MONOPHOSPHATE"), 48.395 -"AHO": ('a', 'p', 'M', None, 'N-ACETYL-N-HYDROXY-L-ORNITHINE'), 48.396 -"P2U": ('n', 'd', None, None, '"2\'-DEOXY-PSEUDOURIDINE-5\'MONOPHOSPHATE"'), 48.397 -"P2T": ('t', 'd', 'M', None, "2'-O-PROPYL THYMIDINE-5-MONOPHOSPHATE"), 48.398 -"MG1": ('g', 'd', 'M', None, "2'-DEOXY-1-METHYLGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.399 -"G32": ('g', 'd', 'M', None, '"O6-METHYL-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.400 -"2AR": ('a', 'd', 'M', None, '"2\'-DEOXYARISTEROMYCIN-5\'-PHOSPHATE"'), 48.401 -"2AS": ('x', 'p', None, None, '"(2S,3S)-3-AMINO-2-METHYL-4-OXOBUTANOIC'), 48.402 -"G46": ('g', 'r', 'M', None, '"2\'-DEOXY-GUANOSINE-5\'-MONOTHIOPHOSPHATE"'), 48.403 -"G47": ('g', 'd', 'M', None, '"N2-ETHANETHIOL-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.404 -"G48": ('g', 'r', 'M', None, '"2\'-O-METHYOXYETHYL-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.405 -"G49": ('g', 'd', 'M', None, '"N2-METHYL-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.406 -"OCS": ('c', 'p', 'M', None, '"CYSTEINESULFONIC'), 48.407 -"P2Y": ('p', 'p', 'M', None, '(2S)-PYRROLIDIN-2-YLMETHYLAMINE'), 48.408 -"PHA": ('f', 'p', 'M', None, 'PHENYLALANINAL'), 48.409 -"NLQ": ('q', 'p', 'M', None, 'N~2~-ACETYL-L-GLUTAMINE'), 48.410 -"NLP": ('l', 'p', 'M', None, '"(1-AMINO-PENTYL)-PHOSPHONIC'), 48.411 -"MLL": ('l', 'p', 'M', None, '"METHYL'), 48.412 -"CEA": ('c', 'p', 'M', None, 'S-HYDROXY-CYSTEINE'), 48.413 -"HV5": ('a', 'p', 'M', None, 'TERT-BUTYLALANINE'), 48.414 -"HMR": ('r', 'p', 'M', None, 'BETA-HOMOARGININE'), 48.415 -"HRG": ('r', 'p', 'M', None, 'L-HOMOARGININE'), 48.416 -"CSP": ('c', 'p', 'M', None, 'S-PHOSPHOCYSTEINE'), 48.417 -"FA2": ('a', 'd', 'M', None, '"5-(6-AMINO-9H-PURIN-9-YL)-4-HYDROXYTETRAHYDROFURAN-3-YL'), 48.418 -"BMP": ('n', 'r', None, None, '"6-HYDROXYURIDINE-5\'-PHOSPHATE"'), 48.419 -"NLE": ('l', 'p', 'M', None, 'NORLEUCINE'), 48.420 -"RTP": ('n', 'r', None, None, '"RIBAVIRIN'), 48.421 -"BMT": ('t', 'p', 'M', None, '4-METHYL-4-[(E)-2-BUTENYL]-4,N-METHYL-THREONINE'), 48.422 -"G38": ('g', 'd', 'M', None, '"3\'-AMINO-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.423 -"CS4": ('c', 'p', 'M', None, 'S-[3-(3,4-DICHLOROPHENYL)-3-OXOPROPYL]-L-CYSTEINE'), 48.424 -"2FI": ('n', 'd', None, None, '"2\'-FLUORO-2\'-DEOXYINOSINE"'), 48.425 -"TTS": ('y', 'p', 'M', None, '3-((3E)-4-HYDROXY-3-{[2-(4-HYDROXYPHENYL)ETHYL]IMINO}-6-OXOCYCLOHEXA-1,4-DIEN-1-YL)ALANINE'), 48.426 -"CS1": ('c', 'p', 'M', None, 'S-(2-ANILINYL-SULFANYL)-CYSTEINE'), 48.427 -"CS0": ('c', 'p', 'M', None, 'S-(2-HYDROXYETHYL)-L-CYSTEINE'), 48.428 -"NLO": ('l', 'p', 'M', None, 'O-METHYL-L-NORLEUCINE'), 48.429 -"NLN": ('l', 'p', 'M', None, '"NORLEUCINE'), 48.430 -"SHP": ('g', 'p', 'M', None, '(4-HYDROXYMALTOSEPHENYL)GLYCINE'), 48.431 -"CSL": ('c', 'd', 'M', None, '"(D)-2\'-METHYLSELENYL-2\'-DEOXYCYTIDINE-5\'-PHOSPHATE"'), 48.432 -"SHR": ('k', 'p', 'M', None, '"N-(5-AMINO-5-CARBOXYPENTYL)GLUTAMIC'), 48.433 -"OXX": ('d', 'p', 'M', None, '"OXALYL-ASPARTYL'), 48.434 -"B3Y": ('y', 'p', 'M', None, '"(3S)-3-AMINO-4-(4-HYDROXYPHENYL)BUTANOIC'), 48.435 -"DLE": ('l', 'p', 'M', None, 'D-LEUCINE'), 48.436 -"PYA": ('a', 'p', 'M', None, '3-(1,10-PHENANTHROL-2-YL)-L-ALANINE'), 48.437 -"CSE": ('c', 'p', 'M', None, 'SELENOCYSTEINE'), 48.438 -"5FA": ('a', 'r', 'M', None, '"ADENOSINE-5\'-PENTAPHOSPHATE"'), 48.439 -"GCK": ('c', 'd', 'M', None, "PHOSPHORIC ACID 9-(2-GUANIDINOETHOXY-3-(2-DEOXY-BETA-D-ERYTHROPENTOFURANOSYL))-3H-PYRIMIDO-[5,4-B][1,4]-BENZOOXAZIN-2-ONE]-5'-ESTER"), 48.440 -"5FC": ('c', 'd', 'M', None, '"5-FORMYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.441 -"GGL": ('e', 'p', 'M', None, '"GAMMA-GLUTAMIC'), 48.442 -"B3Q": ('x', 'p', None, None, '"(3S)-3,6-DIAMINO-6-OXOHEXANOIC'), 48.443 -"B3S": ('s', 'p', 'M', None, '"(3R)-3-AMINO-4-HYDROXYBUTANOIC'), 48.444 -"B3L": ('x', 'p', None, None, '"(3S)-3-AMINO-5-METHYLHEXANOIC'), 48.445 -"A47": ('a', 'd', 'M', None, "N6-METHOXY ADENOSINE 5'-MONOPHOSPHATE"), 48.446 -"A44": ('a', 'r', 'M', None, "2'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"), 48.447 -"5MU": ('u', 'r', 'M', None, "5-METHYLURIDINE 5'-MONOPHOSPHATE"), 48.448 -"CSY": ('x', 'p', None, None, '"[4-(4-HYDROXY-BENZYL)-2-(2-HYDROXY-1-METHYL-ETHYL)-5-OXO-IMIDAZOLIDIN-1-YL]-ACETIC'), 48.449 -"A43": ('a', 'd', 'M', None, "3'-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"), 48.450 -"A40": ('a', 'd', 'M', None, "N2-METHYL 2'-DEOXYADENOSINE 5'-MONOPHOSPHATE"), 48.451 -"B3K": ('k', 'p', 'M', None, '"(3S)-3,7-DIAMINOHEPTANOIC'), 48.452 -"B3D": ('d', 'p', 'M', None, '"3-AMINOPENTANEDIOIC'), 48.453 -"B3E": ('e', 'p', 'M', None, '"(3S)-3-AMINOHEXANEDIOIC'), 48.454 -"DLY": ('k', 'p', 'M', None, 'D-LYSINE'), 48.455 -"MTU": ('a', 'r', 'M', None, '9-BETA-D-RIBOFURANOSYL-9H-PURIN-2-AMINE'), 48.456 -"B3A": ('a', 'p', 'M', None, '"(3S)-3-AMINOBUTANOIC'), 48.457 -"CSS": ('c', 'p', 'M', None, 'S-MERCAPTOCYSTEINE'), 48.458 -"CSR": ('c', 'p', 'M', None, 'S-ARSONOCYSTEINE'), 48.459 -"CZZ": ('c', 'p', 'M', None, 'THIARSAHYDROXY-CYSTEINE'), 48.460 -"N10": ('s', 'p', 'M', None, 'O-[(HEXYLAMINO)CARBONYL]-L-SERINE'), 48.461 -"MGG": ('r', 'p', 'M', None, '"2-(2-CARBOXY-ACETYLAMINO)-5-GUANIDINO-PENTANOIC'), 48.462 -"A35": ('a', 'd', 'M', None, "2-AMINO DEOXYADENOSINE 5'-MONOPHOSPHATE"), 48.463 -"AFG": ('g', 'd', 'M', None, '"N-(5\'-PHOSPHO-2\'-DEOXYGUANOSIN-8-YL)-2-AMINOFLUORENE"'), 48.464 -"BTR": ('w', 'p', 'M', None, '6-BROMO-TRYPTOPHAN'), 48.465 -"SSU": ('u', 'r', 'M', None, '"URIDINE-5\'-PHOSPHOROTHIOATE"'), 48.466 -"70U": ('u', 'r', 'M', None, '"5-(O-METHYLACETO)-2-THIO-2-DEOXY-URIDINE-5\'-MONOPHOSPHATE"'), 48.467 -"A34": ('a', 'd', 'M', None, "N6-METHYL DEOXYADENOSINE 5'-MONOPHOSPHATE"), 48.468 -"MGN": ('q', 'p', 'M', None, '2-METHYL-GLUTAMINE'), 48.469 -"XCL": ('c', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(CYTOSIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'), 48.470 -"G4P": ('n', 'r', None, None, '"GUANOSINE-5\',3\'-TETRAPHOSPHATE"'), 48.471 -"XCS": ('n', 'd', None, None, '(1R)-1-(4-AMINO-6-METHYL-2-OXO-1,2-DIHYDROQUINAZOLIN-8-YL)-1,4-ANHYDRO-2-DEOXY-5-O-PHOSPHONO-D-ERYTHRO-PENTITOL'), 48.472 -"MGQ": ('a', 'r', 'M', None, '"7-BENZYL'), 48.473 -"ABS": ('a', 'd', 'M', None, '"(S)-(N-PHENYL-2-HYDROXY-ETHYL)-2\'-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.474 -"BTA": ('l', 'p', 'M', None, '4-DEMETHYL-LEUCINE'), 48.475 -"MGV": ('g', 'r', 'M', None, '"P-FLUORO-7-BENZYL'), 48.476 -"XCT": ('c', 'd', 'M', None, '4-AMINO-1-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)PYRIMIDIN-2(1H)-ONE'), 48.477 -"TTQ": ('w', 'p', 'M', None, '6-AMINO-7-HYDROXY-L-TRYPTOPHAN'), 48.478 -"MGY": ('g', 'p', 'M', None, 'N-METHYLGLYCINE'), 48.479 -"XCY": ('c', 'd', 'M', None, '"{5-[4-{[4-(AMINOMETHYL)BENZYL]AMINO}-2-OXOPYRIMIDIN-1(2H)-'), 48.480 -"NNH": ('r', 'p', 'M', None, 'NOR-N-OMEGA-HYDROXY-L-ARGININE'), 48.481 -"TBG": ('g', 'p', 'M', None, '"T-BUTYL'), 48.482 -"ATD": ('t', 'd', 'M', None, '"THYMIDINE-3\'-PHOSPHATE"'), 48.483 -"U8U": ('u', 'r', 'M', None, '"5-METHYLAMINOMETHYL-2-THIOURIDINE-5\'-MONOPHOSPHATE"'), 48.484 -"SNC": ('c', 'p', 'M', None, 'S-NITROSO-CYSTEINE'), 48.485 -"ALG": ('r', 'p', 'M', None, '"GUANIDINOBUTYRYL'), 48.486 -"M1G": ('g', 'd', 'M', None, '"3-(2-DEOXY-BETA-D-RIBOFURANOSYL)-PYRIDO[5,6-A]-PURINE-10-ONE-5\'-MONOPHOSPHATE"'), 48.487 -"ATL": ('t', 'd', 'M', None, '"[(1S,3R,4S,7R)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'), 48.488 -"ATM": ('t', 'd', 'M', None, '"3\'-AZIDO-3\'-DEOXYTHYMIDINE-5\'-MONOPHOSPHATE"'), 48.489 -"HPC": ('f', 'p', 'M', None, '3-AMINO-4-PHENYL-BUTAN-2-ONE'), 48.490 -"TP1": ('t', 'd', 'M', None, '2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE'), 48.491 -"XUG": ('g', 'd', 'M', None, "2'-SE-METHYL-2'-SELENOGUANOSINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.492 -"ISO": ('x', 'p', None, None, 'PARA-ISOPROPYLANILINE'), 48.493 -"3MM": ('x', 'p', None, None, '(1R)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM'), 48.494 -"175": ('x', 'p', None, None, '3,5-DIHYDRO-5-METHYLIDENE-4H-IMIDAZOL-4-ON'), 48.495 -"2TY": ('y', 'p', 'M', None, '2-HYDROXY-5-{[(1E)-2-PHENYLETHYLIDENE]AMINO}-L-TYROSINE'), 48.496 -"CSB": ('c', 'p', 'M', None, '"CYS'), 48.497 -"IT1": ('k', 'p', 'M', None, '(E)-N~6~-({3-HYDROXY-2-METHYL-5-[(PHOSPHONOOXY)METHYL]PYRIDIN-4-YL}METHYLIDENE)-L-LYSINE'), 48.498 -"UN1": ('x', 'p', None, None, '"2-AMINOHEXANEDIOIC'), 48.499 -"UN2": ('x', 'p', None, None, '"2-AMINO-4,4-DIHYDROXYBUTANOIC'), 48.500 -"ASP": ('D', 'p', None, None, '"ASPARTIC'), 48.501 -"DTH": ('t', 'p', 'M', None, 'D-THREONINE'), 48.502 -"SER": ('S', 'p', None, None, 'SERINE'), 48.503 -"ASU": ('n', 'd', None, None, '"4\'-THIO-2\'4\'-DIDEOXYRIBOFURANOSE-5\'-PHOSPHATE"'), 48.504 -"SEP": ('s', 'p', 'M', None, 'PHOSPHOSERINE'), 48.505 -"LNT": ('x', 'p', None, None, 'N-[(2S)-2-AMINO-1,1-DIHYDROXY-4-METHYLPENTYL]-L-THREONINE'), 48.506 -"A39": ('a', 'r', 'M', None, "2'-O-METHYL-ADENOSINE 5'-MONOPHOSPHATE"), 48.507 -"DGI": ('g', 'd', 'M', None, '"2\'-DEOXYGUANOSINE-5\'-DIPHOSPHATE"'), 48.508 -"DGN": ('q', 'p', 'M', None, 'D-GLUTAMINE'), 48.509 -"DGL": ('e', 'p', 'M', None, '"D-GLUTAMIC'), 48.510 -"SEG": ('a', 'p', 'M', None, 'HYDROXYALANINE'), 48.511 -"ASB": ('d', 'p', 'M', None, '"ASPARTIC'), 48.512 -"ASA": ('d', 'p', 'M', None, '"ASPARTIC'), 48.513 -"SEC": ('a', 'p', 'M', None, '"2-AMINO-3-SELENINO-PROPIONIC'), 48.514 -"SEB": ('s', 'p', 'M', None, 'O-BENZYLSULFONYL-SERINE'), 48.515 -"0A9": ('f', 'p', 'M', None, '"METHYL'), 48.516 -"ASK": ('d', 'p', 'M', None, '"DEHYDROXYMETHYLASPARTIC'), 48.517 -"PVH": ('h', 'p', 'M', None, 'HISTIDINE-METHYL-ESTER'), 48.518 -"ASI": ('d', 'p', 'M', None, 'L-ISO-ASPARTATE'), 48.519 -"SEL": ('s', 'p', 'M', None, '2-AMINO-1,3-PROPANEDIOL'), 48.520 -"ASN": ('N', 'p', None, None, 'ASPARAGINE'), 48.521 -"ASM": ('x', 'p', None, None, '"2-AMINO-4-OXO-4(1H-PYRROL-1-YL)BUTANOIC'), 48.522 -"ASL": ('d', 'p', 'M', None, '"ASPARTIC'), 48.523 -"AS2": ('d', 'p', 'M', None, '"(2R)-2-AMINO-4-OXOBUTANOIC'), 48.524 -"IMC": ('c', 'd', 'M', None, '"N1-[2-DEOXY-RIBOFURANOSYL]-[2-AMINO-5-METHYL-4-OXO-4H-PYRIMIDINE]-5\'-MONOPHOSPHATE"'), 48.525 -"A3P": ('a', 'r', 'M', None, '"ADENOSINE-3\'-5\'-DIPHOSPHATE"'), 48.526 -"CLH": ('k', 'p', 'M', None, '"2-AMINO-6-[2-(2-OXO-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC'), 48.527 -"3DR": ('n', 'd', None, None, '"1\',2\'-DIDEOXYRIBOFURANOSE-5\'-PHOSPHATE"'), 48.528 -"FRD": ('x', 'p', None, None, '1-PHENYL-2-AMINOPROPANE'), 48.529 -"CLD": ('a', 'p', 'M', None, '"D-PARA-CHLOROPHENYL-1-ACTEAMIDOBORONIC'), 48.530 -"CLE": ('l', 'p', 'M', None, '"LEUCINE'), 48.531 -"PDU": ('n', 'd', None, None, '"5(1-PROPYNYL)-2\'-DEOXYURIDINE-5-MONOPHOSPHATE"'), 48.532 -"CLG": ('k', 'p', 'M', None, '"2-AMINO-6-[2-(2-AMINOOXY-ACETYLAMINO)-ACETYLAMINO]-HEXANOIC'), 48.533 -"BBC": ('c', 'p', 'M', None, '3-[(4-AMINOBUTYL)SULFINYL]-2-IMINOPROPAN-1-OL'), 48.534 -"TFO": ('a', 'd', 'M', None, '"[2-(6-AMINO-9H-PURIN-9-YL)-1-METHYLETHOXY]METHYLPHOSPHONIC'), 48.535 -"CLB": ('a', 'p', 'M', None, '"D-PARA-CHLOROPHENYL-1-ACETAMIDOBORONIC'), 48.536 -"TLC": ('t', 'd', 'M', None, '"2-O,3-ETHDIYL-ARABINOFURANOSYL-THYMINE-5\'-MONOPHOSPHATE"'), 48.537 -"2DM": ('n', 'd', None, None, '"2-HYDROXY-3-(PYREN-1-YLMETHOXY)PROPYL'), 48.538 -"A3A": ('a', 'd', 'M', None, '"2\'DEOXY-ALPHA-ANOMERIC-ADENOSINE-5\'-PHOSPHATE"'), 48.539 -"PDL": ('x', 'p', None, None, '"N-(5\'-PHOSPHOPYRIDOXYL)-L-ALANINE"'), 48.540 -"3DA": ('a', 'd', 'M', None, '"3\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE"'), 48.541 -"GT9": ('c', 'p', 'M', None, 'S-NONYL-CYSTEINE'), 48.542 -"CLV": ('x', 'p', None, None, '"{(2S)-2-[(1S)-1-AMINOETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'), 48.543 -"PDD": ('x', 'p', None, None, '"N-(5\'-PHOSPHOPYRIDOXYL)-D-ALANINE"'), 48.544 -"1TQ": ('w', 'p', 'M', None, '6-(FORMYLAMINO)-7-HYDROXY-L-TRYPTOPHAN'), 48.545 -"SEM": ('x', 'p', None, None, '3-AMINO-4-OXYBENZYL-2-BUTANONE'), 48.546 -"TGP": ('g', 'd', 'M', None, "5'-THIO-2'-DEOXY-GUANOSINE PHOSPHONIC ACID"), 48.547 -"OMC": ('c', 'r', 'M', None, '"O2\'-METHYLYCYTIDINE-5\'-MONOPHOSPHATE"'), 48.548 -"AEA": ('x', 'p', None, None, '"(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETIC'), 48.549 -"OMG": ('g', 'r', 'M', None, '"O2\'-METHYLGUANOSINE-5\'-MONOPHOSPHATE"'), 48.550 -"H2U": ('u', 'r', 'M', None, '"5,6-DIHYDROURIDINE-5\'-MONOPHOSPHATE"'), 48.551 -"A38": ('a', 'd', 'M', None, "8-OXY DEOXYADENOSINE-5'-MONOPHOSPHATE"), 48.552 -"DTY": ('y', 'p', 'M', None, 'D-TYROSINE'), 48.553 -"PVL": ('x', 'p', None, None, '"PYRUVOYL'), 48.554 -"ABA": ('a', 'p', 'M', None, '"ALPHA-AMINOBUTYRIC'), 48.555 -"OMU": ('u', 'r', 'M', None, "O2'-METHYLURIDINE 5'-MONOPHOSPHATE"), 48.556 -"OMT": ('m', 'p', 'M', None, 'S-DIOXYMETHIONINE'), 48.557 -"CRF": ('x', 'p', None, None, '[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(1H-INDOL-3-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETALDEHYDE'), 48.558 -"FPA": ('f', 'p', 'M', None, '"1,1'), 48.559 -"VMS": ('x', 'p', None, None, '"5\'O-[N-(L-VALYL)SULPHAMOYL]ADENOSINE"'), 48.560 -"T11": ('f', 'p', 'M', None, '4-[3-(TRIFLUOROMETHYL)DIAZIRIDIN-3-YL]-L-PHENYLALANINE'), 48.561 -"2MR": ('r', 'p', 'M', None, '"N3,'), 48.562 -"DNE": ('l', 'p', 'M', None, 'D-NORLEUCINE'), 48.563 -"5IC": ('c', 'r', 'M', None, '"5-IODO-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.564 -"RC7": ('x', 'p', None, None, '{(2R)-4-(4-HYDROXYBENZYL)-2-[2-(1H-IMIDAZOL-4-YL)ETHYL]-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETALDEHYDE'), 48.565 -"BPE": ('c', 'p', 'M', None, '(2S)-2-AMINO-3-[(3-AMINOPROPYL)SULFANYL]PROPAN-1-OL'), 48.566 -"2MT": ('p', 'p', 'M', None, '"(4R)-2,2-DIMETHYL-1,3-THIAZOLIDINE-4-CARBOXYLIC'), 48.567 -"2MU": ('u', 'r', 'M', None, '"2\',5-DIMETHYLURIDINE-5\'-MONOPHOSPHATE"'), 48.568 -"DG": ('G', 'd', None, None, '"2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.569 -"UCL": ('n', 'd', None, None, "5-CHLORO-2'-DEOXYURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.570 -"DA": ('A', 'd', None, None, '"2\'-DEOXYADENOSINE-5\'-MONOPHOSPHATE"'), 48.571 -"AEI": ('d', 'p', 'M', None, '"THREONINE-ASPARTIC'), 48.572 -"N5M": ('c', 'r', 'M', None, "5-nitrocytidine 5'-(dihydrogen phosphate)"), 48.573 -"5IU": ('n', 'd', None, None, '"5-IODO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.574 -"2MG": ('g', 'r', 'M', None, '"2N-METHYLGUANOSINE-5\'-MONOPHOSPHATE"'), 48.575 -"AAR": ('r', 'p', 'M', None, 'ARGININEAMIDE'), 48.576 -"DT": ('T', 'd', None, None, '"THYMIDINE-5\'-MONOPHOSPHATE"'), 48.577 -"DU": ('n', 'd', None, None, '"2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.578 -"HV8": ('x', 'p', None, None, 'BENZYL-2-AMINO-PARAMETHOXY-BENZYLSTATINE'), 48.579 -"MC1": ('s', 'p', 'M', None, '"METHICILLIN'), 48.580 -"2FM": ('m', 'p', 'M', None, 'S-(DIFLUOROMETHYL)HOMOCYSTEINE'), 48.581 -"CYS": ('C', 'p', None, None, 'CYSTEINE'), 48.582 -"CYR": ('c', 'p', 'M', None, 'N~5~-[{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}(IMINIO)METHYL]-L-ORNITHINATE'), 48.583 -"BIL": ('x', 'p', None, None, '"(3R,4S)-3-AMINO-4-METHYLHEXANOIC'), 48.584 -"PCA": ('e', 'p', 'M', None, '"PYROGLUTAMIC'), 48.585 -"GYC": ('x', 'p', None, None, '"[(4Z)-2-[(1R)-1-AMINO-2-MERCAPTOETHYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.586 -"SMC": ('c', 'p', 'M', None, 'S-METHYLCYSTEINE'), 48.587 -"0AC": ('g', 'p', 'M', None, '"(4S,5S)-5-AMINO-4-HYDROXY-6-PHENYLHEXANOIC'), 48.588 -"BIF": ('f', 'p', 'M', None, '"(R)-2-AMINO-3-(4-PHENYLCYCLOHEXYL)PROPANOIC'), 48.589 -"2BU": ('a', 'd', 'M', None, "(2S,3S)-N6-(2,3,4-TRIHYDROXYBUTYL)-2'-DEOXYADENOSINE MONO PHOSPHORIC ACID"), 48.590 -"2BT": ('t', 'd', 'M', None, '"2\'-O-BUTYL-THYMIDINE"'), 48.591 -"5PC": ('c', 'd', 'M', None, '"5(1-PROPYNYL)-2\'-DEOXYCYTIDINE-5\'-MONOPHOSPHATE"'), 48.592 -"S2M": ('t', 'd', 'M', None, '"2\'-O-[2-(METHOXY)ETHYL]-2-THIOTHYMIDINE-5\'-MONOPHOSPHATE"'), 48.593 -"5MC": ('c', 'r', 'M', None, '"5-METHYLCYTIDINE-5\'-MONOPHOSPHATE"'), 48.594 -"S2P": ('a', 'p', 'M', None, '"(2S)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC'), 48.595 -"C1X": ('k', 'p', 'M', None, '(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)DIHYDRO-2H-THIOPYRAN-3(4H)-YLIDENE]-L-LYSINE'), 48.596 -"CYG": ('c', 'p', 'M', None, '"2-AMINO-4-(AMINO-3-OXO-PROPYLSULFANYLCARBONYL)-BUTYRIC'), 48.597 -"CYF": ('c', 'p', 'M', None, '"5-[2-(2-AMINO-2-CARBAMOYL-ETHYLSULFANYL)-ACETYLAMINO]-2-(3,6-DIHYDROXY-9,9A-DIHYDRO-3H-XANTHEN-9-YL)-BENZOIC'), 48.598 -"5PY": ('t', 'd', 'M', None, '"1-(2\'-DEOXY-5\'-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-METHYLPYRIMIDIN-2(1H)-ONE"'), 48.599 -"CYJ": ('x', 'p', None, None, '(Z)-N~6~-[(4R,5S)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)PIPERIDIN-3-YLIDENE]-L-LYSINE'), 48.600 -"BIU": ('i', 'p', 'M', None, '5-BROMO-L-ISOLEUCINE'), 48.601 -"ENP": ('n', 'r', None, None, 'ETHENO-NADP'), 48.602 -"AS9": ('x', 'p', None, None, '"N-[HYDROXY(METHYL)PHOSPHORYL]-L-ASPARTIC'), 48.603 -"CYM": ('c', 'p', 'M', None, 'S-METHYLCYSTEINE'), 48.604 -"CY3": ('c', 'p', 'M', None, '2-AMINO-3-MERCAPTO-PROPIONAMIDE'), 48.605 -"DHN": ('v', 'p', 'M', None, '"5-HYDROXY'), 48.606 -"CY1": ('c', 'p', 'M', None, 'ACETAMIDOMETHYLCYSTEINE'), 48.607 -"CY0": ('c', 'p', 'M', None, 'S-{3-[(4-ANILINOQUINAZOLIN-6-YL)AMINO]-3-OXOPROPYL}-L-CYSTEINE'), 48.608 -"SLZ": ('k', 'p', 'M', None, 'L-THIALYSINE'), 48.609 -"CY4": ('c', 'p', 'M', None, 'S-BUTYRYL-CYSTEIN'), 48.610 -"T5S": ('t', 'd', 'M', None, "2'-deoxy-5-(methylselanyl)uridine 5'-phosphate"), 48.611 -"SLR": ('p', 'p', 'M', None, '(3R,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE'), 48.612 -"IML": ('i', 'p', 'M', None, 'N-METHYL-ISOLEUCINE'), 48.613 -"32S": ('x', 'p', None, None, '"(S)-2-AMINO-3-(4H-SELENOLO[3,2-B]-PYRROL-6-YL)-PROPIONIC'), 48.614 -"CSX": ('c', 'p', 'M', None, '"S-OXY'), 48.615 -"MFC": ('x', 'p', None, None, '5-[1-(3-FLUORO-4-HYDROXY-PHENYL)-METH-(Z)-YLIDENE]-3,5-DIHYDRO-IMIDAZOL-4-ONE'), 48.616 -"32T": ('x', 'p', None, None, '"(S)-2-AMINO-3-(4H-THIENO[3,2-B]-PYRROL-6-YL)-PROPIONIC'), 48.617 -"TFT": ('t', 'd', 'M', None, '"(L)-ALPHA-THREOFURANOSYL-THYMINE-3\'-MONOPHOSPHATE"'), 48.618 -"MLZ": ('k', 'p', 'M', None, 'N-METHYL-LYSINE'), 48.619 -"DHU": ('u', 'r', 'M', None, '"5,6-DIHYDROURIDINE-5\'-PHOSPHATE"'), 48.620 -"ASQ": ('d', 'p', 'M', None, 'PHOSPHOASPARTATE'), 48.621 -"SLA": ('p', 'p', 'M', None, '(3S,4R)-3-HYDROXY-2-[(1S)-1-HYDROXY-2-METHYLPROPYL]-4-METHYL-5-OXO-D-PROLINE'), 48.622 -"DHP": ('x', 'p', None, None, '3-DECYL-2,5-DIOXO-4-HYDROXY-3-PYRROLINE'), 48.623 -"E1X": ('a', 'd', 'M', None, '"PHOSPHORIC'), 48.624 -"XGL": ('g', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(GUANIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'), 48.625 -"MCL": ('k', 'p', 'M', None, 'NZ-(1-CARBOXYETHYL)-LYSINE'), 48.626 -"TLB": ('n', 'r', None, None, '"2\'-O,3\'-C-METHYLENE-ARABINOFURANOSYL-THYMINE-5\'-MONOPHOSPHATE"'), 48.627 -"MCG": ('x', 'p', None, None, '(S)-(ALPHA)-METHYL-4-CARBOXYPHENYLGLYCINE'), 48.628 -"OTB": ('x', 'p', None, None, '"TERTIARY-BUTOXYFORMIC'), 48.629 -"FT6": ('w', 'p', 'M', None, '6-FLUORO-L-TRYPTOPHAN'), 48.630 -"XGA": ('n', 'd', None, None, '6-AMINO-3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3,7-DIHYDRO-8H-IMIDAZO[4,5-G]QUINAZOLIN-8-ONE'), 48.631 -"AA4": ('a', 'p', 'M', None, '"2-AMINO-5-HYDROXYPENTANOIC'), 48.632 -"R": ('a', 'd', 'M', None, "2'-DEOXY-N6-(R)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"), 48.633 -"GFL": ('g', 'd', 'M', None, '2-AMINO-9-(2-DEOXY-2-FLUORO-5-O-PHOSPHONO-BETA-D-ARABINOFURANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE'), 48.634 -"10C": ('c', 'r', 'M', None, '4-AMINO-1-{2,5-ANHYDRO-4-[(PHOSPHONOOXY)METHYL]-ALPHA-L-LYXOFURANOSYL}PYRIMIDIN-2(1H)-ONE'), 48.635 -"NRQ": ('x', 'p', None, None, '"{(4Z)-4-(4-HYDROXYBENZYLIDENE)-2-[3-(METHYLTHIO)PROPANIMIDOYL]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'), 48.636 -"XGU": ('g', 'd', 'M', None, '2-AMINO-9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-1,9-DIHYDRO-6H-PURIN-6-ONE'), 48.637 -"MCS": ('c', 'p', 'M', None, '"MALONYL'), 48.638 -"D3": ('n', 'd', None, None, '1-(2-DEOXY-BETA-D-RIBOFURANOSYL)-4-(3-BENZAMIDO)PHENYLIMIDAZOLE'), 48.639 -"PAQ": ('y', 'p', 'M', None, '2-OXY-4-HYDROXY-5-(2-HYDRAZINOPYRIDINE)PHENYLALANINE'), 48.640 -"2PR": ('g', 'd', 'M', None, '"2-AMINO-9-[2-DEOXYRIBOFURANOSYL]-9H-PURINE-5\'-MONOPHOSPHATE"'), 48.641 -"PAT": ('w', 'p', 'M', None, 'ALPHA-PHOSPHONO-TRYPTOPHAN'), 48.642 -"PAU": ('a', 'p', 'M', None, '"PANTOTHENOIC'), 48.643 -"CH": ('c', 'r', 'M', None, "N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"), 48.644 -"GCM": ('x', 'p', None, None, '"GLYCYLMETHYLENE'), 48.645 -"CSZ": ('c', 'p', 'M', None, '"S-SELANYL'), 48.646 -"GMS": ('g', 'd', 'M', None, '"2\'-DEOXYGUANOSINE-5\'-MONOSELENOPHOSPHATE"'), 48.647 -"HBN": ('h', 'p', 'M', None, 'N-(2-NAPHTHYL)HISTIDINAMIDE'), 48.648 -"TQQ": ('w', 'p', 'M', None, '"(S)-2-AMINO-3-(6,7-DIHYDRO-6-IMINO-7-OXO-1H-INDOL-3-YL)PROPANOIC'), 48.649 -"2PI": ('x', 'p', None, None, '"2-AMINO-PENTANOIC'), 48.650 -"GSU": ('e', 'p', 'M', None, '"O5\'-(L-GLUTAMYL-SULFAMOYL)-ADENOSINE"'), 48.651 -"CSI": ('g', 'p', 'M', None, '"AMINO-(2-IMINO-HEXAHYDRO-PYRIMIDIN-4-YL)-ACETIC'), 48.652 -"200": ('f', 'p', 'M', None, '4-CHLORO-L-PHENYLALANINE'), 48.653 -"EXY": ('l', 'p', 'M', None, '6-[(2R)-OXIRAN-2-YL]-L-NORLEUCINE'), 48.654 -"12A": ('a', 'r', 'M', None, '"2-METHYLTHIO-N6-(AMINOCARBONYL-L-THREONYL)-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.655 -"DCL": ('x', 'p', None, None, '2-AMINO-4-METHYL-PENTAN-1-OL'), 48.656 -"UNK": ('x', 'p', None, None, 'UNKNOWN'), 48.657 -"DCI": ('x', 'p', None, None, '2-METHYL-BUTYLAMINE'), 48.658 -"DCG": ('g', 'd', 'M', None, '"2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.659 -"4HT": ('w', 'p', 'M', None, '4-HYDROXYTRYPTOPHAN'), 48.660 -"CSU": ('c', 'p', 'M', None, '"CYSTEINE-S-SULFONIC'), 48.661 -"HSO": ('h', 'p', 'M', None, 'HISTIDINOL'), 48.662 -"HSL": ('s', 'p', 'M', None, '"HOMOSERINE'), 48.663 -"TLN": ('n', 'd', None, None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'), 48.664 -"TZB": ('x', 'p', None, None, '"(4S)-2-[(1E)-1-AMINOPROP-1-ENYL]-4,5-DIHYDRO-1,3-THIAZOLE-4-CARBOXYLIC'), 48.665 -"BTC": ('c', 'p', 'M', None, 'CYSTEINE'), 48.666 -"DCY": ('c', 'p', 'M', None, 'D-CYSTEINE'), 48.667 -"DCT": ('c', 'd', 'M', None, "2',3'-DIDEOXYCYTIDINE 5'-TRIPHOSPHATE"), 48.668 -"4BF": ('y', 'p', 'M', None, '4-BROMO-L-PHENYLALANINE'), 48.669 -"C2L": ('c', 'r', 'M', None, "5-METHYL-3'-O-METHOXYETHYL CYTIDINE 5'-MONOPHOSPHATE"), 48.670 -"RIA": ('a', 'r', 'M', None, '"2\'-O-[(5\'-PHOSPHO)RIBOSYL]ADENOSINE-5\'-MONOPHOSPHATE"'), 48.671 -"IAM": ('a', 'p', 'M', None, '4-[(ISOPROPYLAMINO)METHYL]PHENYLALANINE'), 48.672 -"B1F": ('f', 'p', 'M', None, '"PHENYLALANINE'), 48.673 -"GPN": ('g', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-GUANINE'), 48.674 -"C25": ('c', 'r', 'M', None, '"CYTIDINE-5\'-MONOPHOSPHATE"'), 48.675 -"N2C": ('x', 'p', None, None, 'N,S-DIMETHYLCYSTEINE'), 48.676 -"HAC": ('a', 'p', 'M', None, 'BETA-CYCLOHEXYL-ALANINE'), 48.677 -"MEG": ('e', 'p', 'M', None, '"(2S,3R)-3-METHYL-GLUTAMIC'), 48.678 -"THO": ('x', 'p', None, None, '"REDUCED'), 48.679 -"BNO": ('x', 'p', None, None, '"NORLEUCINE'), 48.680 -"125": ('u', 'r', 'M', None, '"2\'-O-FLUOROETHYL-5-METHYL-URIDINE-5\'-MONOPHOSPHATE"'), 48.681 -"126": ('u', 'r', 'M', None, '"2\'-O-METHYL-[TRI(OXYETHYL)]-5-METHYL-URIDINE-5\'-MONOPHOSPHATE"'), 48.682 -"127": ('u', 'r', 'M', None, '"2\'-O-AMINOOXY-ETHYL-5-METHYL-URIDINE-5\'-MONOPHOSPHATE"'), 48.683 -"128": ('n', 'r', None, None, '"SPIRO(2,4,6-TRINITROBENZENE[1,2A]-2O\',3O\'-METHYLENE-ADENINE-TRIPHOSPHATE"'), 48.684 -"X": ('g', 'd', 'M', None, "2'-DEOXY-N7-(8,9-DIHYDRO-9-HYDROXY-10-DEHYDROXY-AFLATOXIN)GUANOSINE MONOPHOSPHATE"), 48.685 -"HS8": ('h', 'p', 'M', None, '3-(1-SULFO-1H-IMIDAZOL-3-IUM-4-YL)-L-ALANINE'), 48.686 -"NAM": ('a', 'p', 'M', None, '"NAM'), 48.687 -"THC": ('t', 'p', 'M', None, 'N-METHYLCARBONYLTHREONINE'), 48.688 -"HRP": ('w', 'p', 'M', None, '5-HYDROXY-L-TRYPTOPHAN'), 48.689 -"MNV": ('v', 'p', 'M', None, '"N-METHYL-C-AMINO'), 48.690 -"PHI": ('f', 'p', 'M', None, 'IODO-PHENYLALANINE'), 48.691 -"PHM": ('f', 'p', 'M', None, 'PHENYLALANYLMETHANE'), 48.692 -"PHL": ('f', 'p', 'M', None, 'L-PHENYLALANINOL'), 48.693 -"CSW": ('c', 'p', 'M', None, 'CYSTEINE-S-DIOXIDE'), 48.694 -"ZAD": ('a', 'r', 'M', None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-ADENINE"'), 48.695 -"OIC": ('x', 'p', None, None, '"OCTAHYDROINDOLE-2-CARBOXYLIC'), 48.696 -"PHE": ('F', 'p', None, None, 'PHENYLALANINE'), 48.697 -"PHD": ('d', 'p', 'M', None, '"ASPARTYL'), 48.698 -"SYS": ('c', 'p', 'M', None, '3-[(2-AMINO-2-OXOETHYL)SELANYL]-L-ALANINE'), 48.699 -"BG1": ('s', 'p', 'M', None, 'O-[(2S)-2-{METHYL[(METHYLAMINO)SULFONYL]AMINO}PENTANOYL]-L-SERINE'), 48.700 -"NIY": ('y', 'p', 'M', None, 'META-NITRO-TYROSINE'), 48.701 -"MTR": ('t', 'd', 'M', None, '(5-METHYL-6-OXO-1,6-DIHYDRO-PYRIDIN-3-YL)-1,2-DIDEOXY-RIBOFURANOSE-5-MONOPHOSPHATE'), 48.702 -"OAS": ('s', 'p', 'M', None, 'O-ACETYLSERINE'), 48.703 -"BRU": ('n', 'd', None, None, '"5-BROMO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.704 -"NIT": ('x', 'p', None, None, '4-NITROANILINE'), 48.705 -"OIP": ('n', 'd', None, None, "2'-DEOXY-INOSINIC ACID"), 48.706 -"0A0": ('d', 'p', 'M', None, '"2-METHYL-L-ASPARTIC'), 48.707 -"0A1": ('y', 'p', 'M', None, 'O-METHYL-L-TYROSINE'), 48.708 -"0A2": ('k', 'p', 'M', None, '[(1R)-1,5-DIAMINOPENTYL][BIS(ETHANOLATO)]HYDROXYBORATE(1-)'), 48.709 -"CRQ": ('x', 'p', None, None, '"[2-(3-CARBAMOYL-1-IMINO-PROPYL)-4-(4-HYDROXY-BENZYLIDENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC'), 48.710 -"CRW": ('x', 'p', None, None, '"[2-(1-AMINOETHYL)-4-METHYLENE-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.711 -"CRU": ('e', 'p', 'M', None, '"4-[(4Z)-1-(CARBOXYMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-2-YL]-4-IMINOBUTANOIC'), 48.712 -"DMH": ('n', 'p', 'M', None, 'N4,N4-DIMETHYL-ASPARAGINE'), 48.713 -"PRR": ('a', 'p', 'M', None, '3-(METHYL-PYRIDINIUM)ALANINE'), 48.714 -"CRX": ('x', 'p', None, None, '"[2-(1-AMINOETHYL)-2-HYDROXY-4-METHYLENE-5-OXOIMIDAZOLIDIN-1-YL]ACETIC'), 48.715 -"DMK": ('d', 'p', 'M', None, '"3,3-DIMETHYL'), 48.716 -"MHL": ('l', 'p', 'M', None, 'N-METHYL-4-HYDROXY-LEUCINE'), 48.717 -"LAA": ('d', 'p', 'M', None, '(3R)-3-HYDROXY-L-ALPHA-ASPARAGINE'), 48.718 -"0ZM": ('x', 'p', None, None, '"(2R)-2-{[(2R)-2-{[(S)-HYDROXY{[(2R,3S,4S,5R,6R)-3,4,5-TRIHYDROXY-6-METHYLTETRAHYDRO-2H-PYRAN-2-YL]OXY}PHOSPHORYL]AMINO}-4-METHYLPENTANOYL]AMINO}-3-(1H-INDOL-3-YL)PROPANOIC'), 48.719 -"DMT": ('x', 'p', None, None, '"3-HYDROXY-4,4-DIMETHYL-2-(METHYLAMINO)-6-OCTENOIC'), 48.720 -"CRG": ('x', 'p', None, None, '"[2-(1-AMINO-2-HYDROXY-PROPYL)-4-(3H-IMIDAZOL-4-YLMETHYLENE)-5-OXO-4,5-DIHYDRO-IMIDAZOL-1-YL]-ACETIC'), 48.721 -"0ZJ": ('x', 'p', None, None, 'N-(SULFANYLACETYL)-D-PHENYLALANYL-N-[(1S)-4-CARBAMIMIDAMIDO-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE'), 48.722 -"0ZE": ('x', 'p', None, None, '"AMINO{[(4R)-4-({[(3R,6S,8AS)-6-AMINO-6-BENZYL-5-OXOOCTAHYDROINDOLIZIN-3-YL]CARBONYL}AMINO)-5-(1,3-BENZOTHIAZOL-2-YL)-5-OXOPENTYL]AMINO}METHANIMINIUM'), 48.723 -"CRK": ('x', 'p', None, None, '4-{(Z)-[2-[3-(METHYLSULFANYL)PROPANOYL]-5-OXO-1-(2-OXOETHYL)-1,5-DIHYDRO-4H-IMIDAZOL-4-YLIDENE]METHYL}BENZENOLATE'), 48.724 -"TPH": ('x', 'p', None, None, '"2-AMINO-3-PHENYL-PROPANE-1,1-DIOL'), 48.725 -"1PI": ('x', 'p', None, None, '3-(1-CARBAMIMIDOYL-PIPERIDIN-3-YL)-L-ALANINE'), 48.726 -"CRO": ('x', 'p', None, None, '"{2-[(1R,2R)-1-AMINO-2-HYDROXYPROPYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'), 48.727 -"0ZC": ('x', 'p', None, None, '"(3R)-2-[N-(FURAN-2-YLCARBONYL)-L-LEUCYL]-2,3,4,9-TETRAHYDRO-1H-BETA-CARBOLINE-3-CARBOXYLIC'), 48.728 -"MIS": ('s', 'p', 'M', None, 'MONOISOPROPYLPHOSPHORYLSERINE'), 48.729 -"C36": ('c', 'd', 'M', None, '"5-METHYL-5-FLUORO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.730 -"NFA": ('f', 'p', 'M', None, '"PHENYLALANINE'), 48.731 -"PU": ('a', 'r', 'M', None, '"PUROMYCIN-N-AMINOPHOSPHONIC'), 48.732 -"G7M": ('g', 'r', 'M', None, '"N7-METHYL-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.733 -"S6G": ('g', 'd', 'M', None, '"6-THIO-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.734 -"LDH": ('k', 'p', 'M', None, 'N~6~-ETHYL-L-LYSINE'), 48.735 -"TCP": ('t', 'd', 'M', None, '"5\'-METHYLTHYMIDINE"'), 48.736 -"TCQ": ('x', 'p', None, None, '3-((3E)-4-(HYDROXYMETHYL)-6-OXO-3-{[(1S,2R)-2-PHENYLCYCLOPROPYL]IMINO}CYCLOHEXA-1,4-DIEN-1-YL)ALANINE'), 48.737 -"8AN": ('a', 'r', 'M', None, "3'-amino-3'-deoxyadenosine 5'-(dihydrogen phosphate)"), 48.738 -"BUC": ('c', 'p', 'M', None, 'S,S-BUTYLTHIOCYSTEINE'), 48.739 -"C32": ('c', 'd', 'M', None, '"5-BROMO-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.740 -"7MG": ('g', 'r', 'M', None, '"7N-METHYL-8-HYDROGUANOSINE-5\'-MONOPHOSPHATE"'), 48.741 -"BUG": ('l', 'p', 'M', None, '"TERT-LEUCYL'), 48.742 -"LEU": ('L', 'p', None, None, 'LEUCINE'), 48.743 -"MHO": ('m', 'p', 'M', None, 'S-OXYMETHIONINE'), 48.744 -"C": ('C', 'r', None, None, '"CYTIDINE-5\'-MONOPHOSPHATE"'), 48.745 -"DDX": ('n', 'd', None, None, '"2\',3\'-DEHYDRO-2\',3\'-DIDEOXYRIBOFURANOSE-5\'-PHOSPHATE"'), 48.746 -"OPR": ('r', 'p', 'M', None, 'C-(3-OXOPROPYL)ARGININE'), 48.747 -"FOX": ('g', 'd', 'M', None, "((1R,2S,4R)-4-{[2-AMINO-5-(FORMYLAMINO)-6-OXO-3,6-DIHYDROPYRIMIDIN-4-YL]AMINO}-2-HYDROXYCYCLOPENTYL)METHYL 5'-PHOSPHATE"), 48.748 -"MTY": ('y', 'p', 'M', None, 'META-TYROSINE'), 48.749 -"TC1": ('c', 'd', 'M', None, '3-(5-PHOSPHO-2-DEOXY-BETA-D-RIBOFURANOSYL)-2-OXO-1,3-DIAZA-PHENOTHIAZINE'), 48.750 -"FOE": ('c', 'p', 'M', None, '2-(2-AMINO-3-OXO-PROPYLSULFANYL)-N-(4-FLUORO-PHENYL)-N-ISOPROPYL-ACETAMIDE'), 48.751 -"MF3": ('x', 'p', None, None, '"2-AMINO-4-TRIFLUOROMETHYLSULFANYL-BUTYRIC'), 48.752 -"DDN": ('n', 'd', None, None, '"3,4-DIHYDRO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.753 -"FME": ('m', 'p', 'M', None, 'N-FORMYLMETHIONINE'), 48.754 -"EFC": ('c', 'p', 'M', None, 'S,S-(2-FLUOROETHYL)THIOCYSTEINE'), 48.755 -"DDG": ('g', 'd', 'M', None, '"2\',3\'-DIDEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.756 -"MHS": ('h', 'p', 'M', None, '"N1-METHYLATED'), 48.757 -"DDE": ('h', 'p', 'M', None, '{3-[4-(2-AMINO-2-CARBOXY-ETHYL)-1H-IMIDAZOL-2-YL]-1-CARBAMOYL-PROPYL}-TRIMETHYL-AMMONIUM'), 48.758 -"CR2": ('x', 'p', None, None, '"{(4Z)-2-(AMINOMETHYL)-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'), 48.759 -"DSN": ('s', 'p', 'M', None, 'D-SERINE'), 48.760 -"PSH": ('h', 'p', 'M', None, '1-THIOPHOSPHONO-L-HISTIDINE'), 48.761 -"6MI": ('n', 'd', None, None, '6-METHYL-8-(2-DEOXY-RIBOFURANOSYL)ISOXANTHOPTERIDINE'), 48.762 -"CR7": ('x', 'p', None, None, '"[(4Z)-2-[(1S)-1,5-DIAMINOPENTYL]-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.763 -"0AV": ('a', 'd', 'M', None, "2'-O-methyladenosine 5'-(dihydrogen phosphate)"), 48.764 -"CR5": ('g', 'p', 'M', None, '(2R)-2-(AMINOMETHYL)-2,4-DIHYDROXY-5-OXO-3-(2-OXOETHYL)-2,5-DIHYDRO-1H-IMIDAZOL-3-IUM'), 48.765 -"MOR": ('x', 'p', None, None, 'N-CARBONYLMORPHOLINE'), 48.766 -"CR8": ('x', 'p', None, None, '2-[1-AMINO-2-(1H-IMIDAZOL-5-YL)ETHYL]-1-(CARBOXYMETHYL)-4-[(4-OXOCYCLOHEXA-2,5-DIEN-1-YLIDENE)METHYL]-1H-IMIDAZOL-5-OLATE'), 48.767 -"PSA": ('f', 'p', 'M', None, '"3-HYDROXY-4-AMINO-5-PHENYLPENTANOIC'), 48.768 -"ANI": ('x', 'p', None, None, '4-TRIFLUOROMETHYLANILINE'), 48.769 -"NP3": ('n', 'd', None, None, '"1-[2-DEOXY-RIBOFURANOSYL]-1H-[3-NITRO-PYRROL]-5\'-PHOSPHATE"'), 48.770 -"DM0": ('k', 'p', 'M', None, 'N~2~,N~2~,N~6~,N~6~-TETRAMETHYL-L-LYSINE'), 48.771 -"0AA": ('v', 'p', 'M', None, '"METHYL'), 48.772 -"0AB": ('v', 'p', 'M', None, '(3S,4S)-3-AMINO-4-METHYL-3,4-DIHYDRO-2H-PYRAN-2-ONE'), 48.773 -"FOG": ('f', 'p', 'M', None, 'PHENYLALANINOYL-[1-HYDROXY]-2-PROPYLENE'), 48.774 -"0AD": ('g', 'd', 'M', None, "2'-deoxy-N-propylguanosine 5'-(dihydrogen phosphate)"), 48.775 -"LCC": ('n', 'd', None, None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'), 48.776 -"NVA": ('v', 'p', 'M', None, 'NORVALINE'), 48.777 -"0AG": ('l', 'p', 'M', None, 'N-(ETHOXYCARBONYL)-L-LEUCINE'), 48.778 -"0AH": ('s', 'p', 'M', None, 'O-(BROMOACETYL)-L-SERINE'), 48.779 -"1MA": ('a', 'r', 'M', None, '"6-HYDRO-1-METHYLADENOSINE-5\'-MONOPHOSPHATE"'), 48.780 -"0AK": ('d', 'p', 'M', None, '"(2S)-2-AMINO-4-(2-CHLOROETHOXY)-4-OXOBUTANOIC'), 48.781 -"0AM": ('a', 'd', 'M', None, "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"), 48.782 -"PST": ('t', 'd', 'M', None, '"THYMIDINE-5\'-THIOPHOSPHATE"'), 48.783 -"PSU": ('u', 'r', 'M', None, '"PSEUDOURIDINE-5\'-MONOPHOSPHATE"'), 48.784 -"1ZX": ('x', 'p', None, None, 'D-PHENYLALANYL-N-[(1S)-1-ACETYL-4-{[AMINO(IMINIO)METHYL]AMINO}BUTYL]-L-PROLINAMIDE'), 48.785 -"ILG": ('e', 'p', 'M', None, '"GLUTAMYL'), 48.786 -"ILE": ('I', 'p', None, None, 'ISOLEUCINE'), 48.787 -"5CM": ('c', 'd', 'M', None, '"5-METHYL-2\'-DEOXY-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.788 -"D11": ('t', 'p', 'M', None, 'D-PHOSHPHOTHREONINE'), 48.789 -"UMS": ('n', 'd', None, None, '"2\'-METHYLSELENYL-2\'-DEOXYURIDINE-5\'-PHOSPHATE"'), 48.790 -"X9Q": ('x', 'p', None, None, '"{(2S)-2-[(1S)-1-AMINOETHYL]-4-BENZYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'), 48.791 -"FGP": ('s', 'p', 'M', None, '"2-AMINO-3-HYDROXY-3-PHOSPHONOOXY-PROPIONIC'), 48.792 -"DVA": ('v', 'p', 'M', None, 'D-VALINE'), 48.793 -"5ZA": ('x', 'p', None, None, '(5Z)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-5-[(4-AMINO-1H-INDOL-3-YL)METHYLENE]-3-(2-HYDROXYETHYL)-3,5-DIHYDRO-4H-IMIDAZOL-4-ONE'), 48.794 -"BCC": ('x', 'p', None, None, '6-AMINO-6-BENZYL-5-OXO-OCTAHYDRO-INDOLIZINE-3-CARBALDEHYDE'), 48.795 -"TMB": ('t', 'p', 'M', None, 'N-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE'), 48.796 -"3AH": ('h', 'p', 'M', None, '[HISTIDIN-1-YL-4H-[1,2,4]TRIAZOL-5-YL]-AMINE'), 48.797 -"BCX": ('c', 'p', 'M', None, 'BETA-3-CYSTEINE'), 48.798 -"KPI": ('k', 'p', 'M', None, '"(2S)-2-AMINO-6-[(1-HYDROXY-1-OXO-PROPAN-2-YLIDENE)AMINO]HEXANOIC'), 48.799 -"A2L": ('a', 'r', 'M', None, "3'-O-METHYOXYETHYL-ADENOSINE 5'-MONOPHOSPHATE"), 48.800 -"A2M": ('a', 'r', 'M', None, '"2\'-O-METHYL-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.801 -"ILX": ('i', 'p', 'M', None, '4,5-DIHYDROXYISOLEUCINE'), 48.802 -"PEC": ('c', 'p', 'M', None, 'S,S-PENTYLTHIOCYSTEINE'), 48.803 -"IGL": ('g', 'p', 'M', None, '"ALPHA-AMINO-2-INDANACETIC'), 48.804 -"OLE": ('x', 'p', None, None, '"2-HYDROXY-4-METHYL-PENTANOIC'), 48.805 -"LPG": ('g', 'p', 'M', None, 'L-PROPARGYLGLYCINE'), 48.806 -"DI": ('n', 'd', None, None, '"2\'-DEOXYINOSINE-5\'-MONOPHOSPHATE"'), 48.807 -"ALO": ('t', 'p', 'M', None, 'ALLO-THREONINE'), 48.808 -"OLT": ('t', 'p', 'M', None, 'O-METHYL-L-THREONINE'), 48.809 -"TBM": ('t', 'p', 'M', None, '4-METHYL-4-[(E)-2-BUTENYL]-4,N-DIMETHYL-THREONINE'), 48.810 -"2AU": ('u', 'r', 'M', None, '"2\'-AMINOURIDINE"'), 48.811 -"LED": ('l', 'p', 'M', None, '(4R)-5-OXO-L-LEUCINE'), 48.812 -"OLZ": ('s', 'p', 'M', None, 'O-(2-AMINOETHYL)-L-SERINE'), 48.813 -"C6C": ('c', 'p', 'M', None, '"S-CYCLOHEXYL'), 48.814 -"IEY": ('x', 'p', None, None, '2-((1E)-2-(5-IMIDAZOLYL)ETHENYL)-4-(P-HYDROXYBENZYLIDENE)-5-IMIDAZOLINONE'), 48.815 -"HEU": ('n', 'd', None, None, '"3-(2-HYDROXYETHYL)-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.816 -"SRA": ('a', 'r', 'M', None, "ADENOSINE -5'-THIO-MONOPHOSPHATE"), 48.817 -"ZTH": ('n', 'r', None, None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-THYMINE"'), 48.818 -"NDN": ('n', 'd', None, None, "2'-DEOXY-5-NITROURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.819 -"C66": ('x', 'p', None, None, '2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-CYTOSINE'), 48.820 -"6CW": ('w', 'p', 'M', None, '6-CHLORO-L-TRYPTOPHAN'), 48.821 -"CR0": ('x', 'p', None, None, '[2-(1-AMINO-2-HYDROXYPROPYL)-2-HYDROXY-4-ISOBUTYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETALDEHYDE'), 48.822 -"DBM": ('n', 'r', None, None, '9-(6-DEOXY-BETA-D-ALLOFURANOSYL)-6-METHYLPURINE'), 48.823 -"N": ('n', 'r', None, None, "ANY 5'-MONOPHOSPHATE NUCLEOTIDE"), 48.824 -"N6G": ('g', 'r', 'M', None, '"((2R,3S,4R,5S)-5-(2,6-DIAMINO-9H-PURIN-9-YL)-3,4-DIHYDROXY-TETRAHYDROFURAN-2-YL)METHYL'), 48.825 -"ALC": ('a', 'p', 'M', None, '"2-AMINO-3-CYCLOHEXYL-PROPIONIC'), 48.826 -"S1H": ('s', 'p', 'M', None, '1-HEXADECANOSULFONYL-O-L-SERINE'), 48.827 -"FZN": ('k', 'p', 'M', None, '(2S)-2-amino-6-{[(1Z)-1-{[(2R,3R,4S,5R)-5-({[(R)-{[(R)-{[(2R,3S,4R,5R)-5-(6-amino-9H-purin-9-yl)-3,4-dihydroxytetrahydrofuran-2-yl]methoxy}(hydroxy)phosphoryl]oxy}(hydroxy)phosphoryl]oxy}methyl)-3,4-dihydroxytetrahydrofuran-2-yl]sulfanyl}ethylidene]amino}hexanoic acid'), 48.828 -"0SP": ('a', 'd', 'M', None, "2'-deoxy-N-[3-(propyldisulfanyl)propyl]adenosine 5'-(dihydrogen phosphate)"), 48.829 -"OBS": ('x', 'p', None, None, '(Z)-N^6-[(4S,5R)-5-(2-CARBOXYETHYL)-4-(CARBOXYMETHYL)-1-HYDROXYDIHYDRO-2H-THIOPYRANIUM-3(4H)-YLIDENE]-L-LYSINE'), 48.830 -"2NT": ('t', 'd', 'M', None, "2'-O-[2-[HYDROXY(METHYLENEAMINO)OXY]ETHYL THYMIDINE-5'-MONOPHOSPHATE"), 48.831 -"K1R": ('c', 'p', 'M', None, '"(2S)-2-AMINO-4-[({[(2R)-2-AMINO-2-CARBOXYETHYL]THIO}AMINO)SULFINYL]BUTANOIC'), 48.832 -"NMT": ('t', 'd', 'M', None, '1-(O2-(METHYLCARBAMOYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE'), 48.833 -"A23": ('a', 'r', 'M', None, "ADENOSINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"), 48.834 -"D1P": ('n', 'd', None, None, '"2\'-DEOXY-RIBOFURANOSE-5\'-PHOSPHATE"'), 48.835 -"NMS": ('t', 'd', 'M', None, '1-(O2-(2-METHYLAMINO-2-OXO-ETHYL)-O5-HYDROXYPHOSPHINYL-BETA-D-RIBOFURANOSYL)THYMINE'), 48.836 -"CIR": ('r', 'p', 'M', None, 'CITRULLINE'), 48.837 -"CH6": ('x', 'p', None, None, '"{(4Z)-2-[(1S)-1-AMINO-3-(METHYLSULFANYL)PROPYL]-4-[(4-HYDROXYPHENYL)METHYLIDENE]-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL}ACETIC'), 48.838 -"BJH": ('x', 'p', None, None, '"1(R)-1-ACETAMIDO-2-(3-CARBOXY-2-HYDROXYPHENYL)ETHYL'), 48.839 -"YYG": ('g', 'r', 'M', None, '"4-(3-[5-O-PHOSPHONORIBOFURANOSYL]-4,6-DIMETHYL-8-OXO-4,8-DIHYDRO-3H-1,3,4,5,7A-PENTAAZA-S-INDACEN-YLAMINO-BUTYRIC'), 48.840 -"2MA": ('a', 'r', 'M', None, '"2-METHYLADENOSINE-5\'-MONOPHOSPHATE"'), 48.841 -"NMC": ('g', 'p', 'M', None, '"N-CYCLOPROPYLMETHYL'), 48.842 -"0AY": ('x', 'p', None, None, '"DIETHYL'), 48.843 -"UFP": ('n', 'd', None, None, '"5-FLUORO-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.844 -"SET": ('s', 'p', 'M', None, 'AMINOSERINE'), 48.845 -"UFR": ('n', 'd', None, None, "2'-DEOXY-5-FORMYLURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.846 -"NMM": ('r', 'p', 'M', None, '"(R)-2-AMINO-5-(3-METHYLGUANIDINO)BUTANOIC'), 48.847 -"UFT": ('n', 'd', None, None, "2'-deoxy-2'-fluorouridine 5'-(dihydrogen phosphate)"), 48.848 -"0AZ": ('p', 'p', 'M', None, '(4R)-4-HYDROXY-L-PROLINE'), 48.849 -"5IT": ('t', 'd', 'M', None, '"5-IODO-THYMIDINE-5\'-PHOSPHATE"'), 48.850 -"DIL": ('i', 'p', 'M', None, 'D-ISOLEUCINE'), 48.851 -"FHL": ('x', 'p', None, None, '(E)-N~6~-[3-CARBOXY-1-(HYDROXYMETHYL)PROPYLIDENE]-L-LYSINE'), 48.852 -"0Z6": ('x', 'p', None, None, 'D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PHENYLALANINAMIDE'), 48.853 -"AR4": ('e', 'p', 'M', None, '"2-AMINO-5-(3-FLUORO-3,4-DIHYDROXY-5-HYDROXYMETHYL-TETRAHYDRO-FURAN-2-YLOXY)-5-HYDROXY-PENTANOIC'), 48.854 -"EIT": ('t', 'd', 'M', None, '"((3R,4R,5R)-4-(2-(1H-IMIDAZOL-1-YL)ETHOXY)-3-HYDROXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)-TETRAHYDROFURAN-2-YL)METHYL'), 48.855 -"SD2": ('x', 'p', None, None, 'N-(SULFANYLACETYL)TYROSYLPROLYLMETHIONINAMIDE'), 48.856 -"CH7": ('x', 'p', None, None, '"[(4Z)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-2-(3,4,5,6-TETRAHYDROPYRIDIN-2-YL)-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.857 -"N5I": ('n', 'd', None, None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-5-NITRO-1H-INDOLE'), 48.858 -"5AT": ('t', 'd', 'M', None, '"5\'-AMINO-5\'-DEOXYTHYMIDINE"'), 48.859 -"LOL": ('x', 'p', None, None, 'LEUCINOL'), 48.860 -"HYP": ('p', 'p', 'M', None, '4-HYDROXYPROLINE'), 48.861 -"IYT": ('t', 'p', 'M', None, 'N-ALPHA-ACETYL-3,5-DIIODOTYROSYL-D-THREONINE'), 48.862 -"LOV": ('x', 'p', None, None, '"5-AMINO-4-HYDROXY-2-ISOPROPYL-7-METHYL-OCTANOIC'), 48.863 -"LCX": ('k', 'p', 'M', None, '"CARBOXYLATED'), 48.864 -"GND": ('x', 'p', None, None, '"2-AMINO-5-GUANIDINO-PENTANOIC'), 48.865 -"GNE": ('n', 'd', None, None, '1,N2-ETHENOGUANINE'), 48.866 -"FHU": ('u', 'r', 'M', None, '"(5S,6R)-5-FLUORO-6-HYDROXY-PSEUDOURIDINE-5\'-MONOPHOSPHATE"'), 48.867 -"C12": ('x', 'p', None, None, '2-(1-AMINO-2-HYDROXYPROPYL)-4-(4-HYDROXYBENZYL)-1-(2-OXOETHYL)-1H-IMIDAZOL-5-OLATE'), 48.868 -"DIV": ('v', 'p', 'M', None, 'D-ISOVALINE'), 48.869 -"T6A": ('a', 'r', 'M', None, '"N-[N-(9-B-D-RIBOFURANOSYLPURIN-6-YL)CARBAMOYL]THREONINE-5\'-MONOPHOSPHATE"'), 48.870 -"DIR": ('r', 'p', 'M', None, '3-{[(E)-AMINO(HYDROXYIMINO)METHYL]AMINO}ALANINE'), 48.871 -"AIB": ('a', 'p', 'M', None, '"ALPHA-AMINOISOBUTYRIC'), 48.872 -"SOC": ('c', 'p', 'M', None, 'DIOXYSELENOCYSTEINE'), 48.873 -"NSK": ('x', 'p', None, None, '"N-SUCCINYL'), 48.874 -"LCG": ('g', 'd', 'M', None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(GUANIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'), 48.875 -"MT2": ('m', 'p', 'M', None, '[(3S)-3-AMINO-3-CARBOXYPROPYL](ETHYL)METHYLSULFONIUM'), 48.876 -"GAU": ('e', 'p', 'M', None, '"(4S)-4-AMINO-5-HYDROXYPENTANOIC'), 48.877 -"PPH": ('l', 'p', 'M', None, '"PHENYLALANINE'), 48.878 -"PPN": ('f', 'p', 'M', None, 'PARA-NITROPHENYLALANINE'), 48.879 -"1MG": ('g', 'r', 'M', None, '"1N-METHYLGUANOSINE-5\'-MONOPHOSPHATE"'), 48.880 -"GAO": ('g', 'r', 'M', None, "GUANINE ARABINOSE-5'-PHOSPHATE"), 48.881 -"FAI": ('n', 'r', None, None, '5-(FORMYLAMINO)-1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE-4-CARBOXAMIDE'), 48.882 -"PPW": ('g', 'd', 'M', None, '"7-DEAZA-8-AZA-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.883 -"2ML": ('l', 'p', 'M', None, '2-METHYLLEUCINE'), 48.884 -"MBZ": ('n', 'd', None, None, '"1-[2-DEOXYRIBOFURANOSYL]-4-METHYL-BENZOIMIDAZOLE-5\'-MONOPHOSPHATE"'), 48.885 -"TPN": ('t', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE'), 48.886 -"SOY": ('s', 'p', 'M', None, '"OXACILLIN-ACYLATED'), 48.887 -"CBV": ('c', 'r', 'M', None, "5-BROMOCYTIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.888 -"MBQ": ('y', 'p', 'M', None, '2-HYDROXY-5-({1-[(4-METHYLPHENOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)-L-TYROSINE'), 48.889 -"G31": ('g', 'd', 'M', None, '"3\'-METHYL-2\',3\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.890 -"HTI": ('c', 'p', 'M', None, '(4S)-4-{[(2S)-2-AMINO-3-OXOPROPYL]SULFANYL}-L-HOMOSERINE'), 48.891 -"XX1": ('k', 'p', 'M', None, 'N~6~-7H-PURIN-6-YL-L-LYSINE'), 48.892 -"6OG": ('g', 'd', 'M', None, "6-O-METHYL GUANOSINE-5'-MONOPHOSPHATE"), 48.893 -"0AF": ('w', 'p', 'M', None, '7-HYDROXY-L-TRYPTOPHAN'), 48.894 -"G36": ('g', 'd', 'M', None, '"O6-ETHYL-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.895 -"I": ('n', 'r', None, None, '"INOSINIC'), 48.896 -"YOF": ('y', 'p', 'M', None, '3-FLUOROTYROSINE'), 48.897 -"HPQ": ('f', 'p', 'M', None, 'HOMOPHENYLALANINYLMETHANE'), 48.898 -"LCA": ('a', 'r', 'M', None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(ADENIN-9-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'), 48.899 -"IU": ('u', 'r', 'M', None, '"5-IODOURIDINE-5\'-MONOPHOSPHATE"'), 48.900 -"CYQ": ('c', 'p', 'M', None, '"2-AMINO-3-PHOSPHONOMETHYLSULFANYL-PROPIONIC'), 48.901 -"M0H": ('c', 'p', 'M', None, 'S-(HYDROXYMETHYL)-L-CYSTEINE'), 48.902 -"FA5": ('n', 'r', None, None, '"ADENOSINE-5\'-[PHENYLALANINYL-PHOSPHATE]"'), 48.903 -"LVG": ('g', 'p', 'M', None, 'L-VINYLGLYCINE'), 48.904 -"HPH": ('x', 'p', None, None, 'PHENYLALANINDIOL'), 48.905 -"IRN": ('n', 'r', None, None, '1-(5-O-PHOSPHONO-BETA-D-RIBOFURANOSYL)-1H-IMIDAZOLE'), 48.906 -"Y": ('a', 'd', 'M', None, "2'-DEOXY-N6-(S)STYRENE OXIDE ADENOSINE MONOPHOSPHATE"), 48.907 -"IC": ('c', 'r', 'M', None, '"ISOCYTIDINE-5\'-MONOPHOSPHATE"'), 48.908 -"4DP": ('w', 'p', 'M', None, '3-[5-(DIMETHYLAMINO)-1,3-DIOXO-1,3-DIHYDRO-2H-ISOINDOL-2-YL]-L-ALANINE'), 48.909 -"HPE": ('f', 'p', 'M', None, 'HOMOPHENYLALANINE'), 48.910 -"PR5": ('a', 'r', 'M', None, "PURINE RIBOSIDE-5'-MONOPHOSPHATE"), 48.911 -"CDE": ('x', 'p', None, None, '1,2-DIMETHYL-PROPYLAMINE'), 48.912 -"SDP": ('s', 'p', 'M', None, '"2-AMINO-3-(DIETHOXY-PHOSPHORYLOXY)-PROPIONIC'), 48.913 -"GN7": ('g', 'd', 'M', None, '"N7-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.914 -"ARV": ('x', 'p', None, None, '5-N-ALLYL-ARGININE'), 48.915 -"2AT": ('t', 'd', 'M', None, "2'-O-ALLYL THYMIDINE-5'-MONOPHOSPHATE"), 48.916 -"T66": ('x', 'p', None, None, '2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-THYMINE'), 48.917 -"LCK": ('k', 'p', 'M', None, '(Z)-N~6~-(2-CARBOXY-1-METHYLETHYLIDENE)-L-LYSINE'), 48.918 -"EHP": ('f', 'p', 'M', None, '3-HYDROXYPHENYLALANINE'), 48.919 -"CHP": ('g', 'p', 'M', None, '3-CHLORO-4-HYDROXYPHENYLGLYCINE'), 48.920 -"HY3": ('p', 'p', 'M', None, '3-HYDROXYPROLINE'), 48.921 -"THX": ('n', 'd', None, None, 'PHOSPHONIC ACID 6-({6-[6-(6-CARBAMOYL-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONYL)-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDOCENE-2-CARBONYL]-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONL}-AMINO)-HEXYL ESTER 5-(5-METHYL-2,4-DIOXO-3,4-DIHYDRO-2H-PYRIMIDIN-1-YL)-TETRAHYDRO-FURAN-2-YLMETHYL ESTER'), 48.922 -"LCH": ('n', 'd', None, None, '"[(1R,3R,4R,7S)-7-HYDROXY-3-(5-METHYLCYTOSIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL'), 48.923 -"4MF": ('n', 'd', None, None, '1-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-4-METHYL-1H-INDOLE'), 48.924 -"DTR": ('w', 'p', 'M', None, 'D-TRYPTOPHAN'), 48.925 -"MOD": ('x', 'p', None, None, '"L-METHIONYL'), 48.926 -"4MM": ('x', 'p', None, None, '(1S)-1-CARBOXY-N,N,N-TRIMETHYL-3-(METHYLSULFANYL)PROPAN-1-AMINIUM'), 48.927 -"ARO": ('r', 'p', 'M', None, '"C-GAMMA-HYDROXY'), 48.928 -"GSS": ('g', 'd', 'M', None, "2'-DEOXY-N2-(S)STYRENE OXIDE GUANOSINE MONOPHOSPHATE"), 48.929 -"DRZ": ('n', 'd', None, None, '"3\',4\'-DIHYDROXY-PENTANAL-5\'-PHOSPHATE"'), 48.930 -"CCY": ('x', 'p', None, None, '2-(1-AMINO-2-MERCAPTO-ETHYL)-5-(4-HYDROXY-BENZYL)-3-(ETHANOYL)-3,5-DIHYDRO-IMIDAZOL-4-ONE'), 48.931 -"TPQ": ('y', 'p', 'M', None, '5-(2-CARBOXY-2-AMINOETHYL)-2-HYDROXY-1,4-BENZOQUINONE'), 48.932 -"5CS": ('c', 'p', 'M', None, '"2-AMINO-3-(CYSTEIN-S-YL)-ISOXAZOLIDIN-5-YL-ACETIC'), 48.933 -"4OC": ('c', 'r', 'M', None, '"4N,O2\'-METHYLCYTIDINE-5\'-MONOPHOSPHATE"'), 48.934 -"DRP": ('n', 'd', None, None, '"2-DEOXYRIBOFURANOSYL-PYRIDINE-5\'-MONOPHOSPHATE"'), 48.935 -"BOR": ('r', 'p', 'M', None, '"(1R)-1-AMINO-4-{[(E)-AMINO(IMINO)METHYL]AMINO}BUTYLBORONIC'), 48.936 -"DRT": ('t', 'd', 'M', None, "2'-DEOXY-L-RIBO-FURANOSYL THYMINE-5'-MONOPHOSPHATE"), 48.937 -"CCS": ('c', 'p', 'M', None, '"CARBOXYMETHYLATED'), 48.938 -"TS": ('n', 'd', None, None, '"THYMIDINE-5\'-THIOPHOSPHATE"'), 48.939 -"MME": ('m', 'p', 'M', None, '"N-METHYL'), 48.940 -"CCL": ('k', 'p', 'M', None, 'N~6~-[(CYCLOPENTYLOXY)CARBONYL]-D-LYSINE'), 48.941 -"GSC": ('g', 'p', 'M', None, '"2-ETHYLTHIO'), 48.942 -"DRM": ('n', 'd', None, None, '"{[(1R,2S)-2-(2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)CYCLOPENTYL]OXY}METHYLPHOSPHONIC'), 48.943 -"4FB": ('p', 'p', 'M', None, '(4S)-4-FLUORO-L-PROLINE'), 48.944 -"GYS": ('x', 'p', None, None, '"[(4Z)-2-(1-AMINO-2-HYDROXYETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.945 -"NAL": ('a', 'p', 'M', None, 'BETA-(2-NAPHTHYL)-ALANINE'), 48.946 -"6MT": ('a', 'r', 'M', None, '"TRANS-N6-METHYL-DEOXY-ADENOSINE-5\'-MONOPHOSPHATE"'), 48.947 -"CCC": ('c', 'r', 'M', None, "CYTIDINE-5'-PHOSPHATE-2',3'-CYCLIC PHOSPHATE"), 48.948 -"UMP": ('u', 'r', 'M', None, "2'-DEOXYURIDINE 5'-MONOPHOSPHATE"), 48.949 -"ML3": ('k', 'p', 'M', None, '2-{[(2R)-2-AMINO-2-CARBOXYETHYL]SULFANYL}-N,N,N-TRIMETHYLETHANAMINIUM'), 48.950 -"LPD": ('p', 'p', 'M', None, 'L-PROLINAMIDE'), 48.951 -"OHI": ('h', 'p', 'M', None, '3-(2-OXO-2H-IMIDAZOL-4-YL)-L-ALANINE'), 48.952 -"LYR": ('k', 'p', 'M', None, 'N~6~-[(2Z,4E,6E,8E)-3,7-DIMETHYL-9-(2,6,6-TRIMETHYLCYCLOHEX-1-EN-1-YL)NONA-2,4,6,8-TETRAENYL]LYSINE'), 48.953 -"LYS": ('K', 'p', None, None, 'LYSINE'), 48.954 -"LYX": ('k', 'p', 'M', None, "N''-(2-COENZYME A)-PROPANOYL-LYSINE"), 48.955 -"CYA": ('c', 'p', 'M', None, '"TWO'), 48.956 -"LYZ": ('k', 'p', 'M', None, '5-HYDROXYLYSINE'), 48.957 -"ODP": ('n', 'r', None, None, '"4-OXO-NICOTINAMIDE-ADENINE'), 48.958 -"C3Y": ('c', 'p', 'M', None, 'S-[(1S)-1-HYDROXY-1-(HYDROXYAMINO)ETHYL]-L-CYSTEINE'), 48.959 -"POM": ('p', 'p', 'M', None, 'CIS-5-METHYL-4-OXOPROLINE'), 48.960 -"LYM": ('k', 'p', 'M', None, 'DEOXY-METHYL-LYSINE'), 48.961 -"LYN": ('k', 'p', 'M', None, '"2,6-DIAMINO-HEXANOIC'), 48.962 -"GLQ": ('e', 'p', 'M', None, '"4-AMINO-5-OXO-PENTANOIC'), 48.963 -"PIV": ('x', 'p', None, None, '"PIVALIC'), 48.964 -"OHS": ('d', 'p', 'M', None, 'O-(CARBOXYSULFANYL)-4-OXO-L-HOMOSERINE'), 48.965 -"X9A": ('x', 'p', None, None, 'amino[(3-{(2Z,5S,6S,9R,12S,13R,16R)-5,16-dicarboxy-2-ethylidene-12-[(1E,3E,5S,6R)-6-methoxy-3,5-dimethyl-7-phenylhepta-1,3-dien-1-yl]-1,6,13-trimethyl-3,7,10,14,19-pentaoxo-1,4,8,11,15-pentaazacyclononadecan-9-yl}propyl)amino]methaniminium'), 48.966 -"SMT": ('n', 'r', None, None, '"2\'-[(METHYLTHIO)ETHYLOXY]-THYMIDINE-5\'-MONOPHOSPHATE"'), 48.967 -"XAR": ('n', 'd', None, None, '"[(1R,4S,6S)-4-(6-AMINO-9H-PURIN-9-YL)-6-HYDROXYCYCLOHEX-2-EN-1-YL]METHYL'), 48.968 -"CYD": ('c', 'p', 'M', None, '"2-AMINO-6-(CYSTEIN-S-YL)-5-OXO-HEXANOIC'), 48.969 -"SVA": ('s', 'p', 'M', None, '"SERINE'), 48.970 -"5HT": ('t', 'd', 'M', None, '5-HYDROXY-THYMIDINE'), 48.971 -"5HU": ('n', 'd', None, None, '"5-HYDROXYMETHYLURIDINE-2\'-DEOXY-5\'-MONOPHOSPHATE"'), 48.972 -"XAL": ('a', 'd', 'M', None, '"[(1S,4R,6R)-6-HYDROXY-4-(ADENIN-9-YL)CYCLOHEX-2-EN-1-YL]METHYL'), 48.973 -"T": ('n', 'd', None, None, '"THYMIDINE-5\'-MONOPHOSPHATE"'), 48.974 -"5HP": ('e', 'p', 'M', None, '"PYROGLUTAMIC'), 48.975 -"4SC": ('c', 'd', 'M', None, "4'-THIO-2'-DEOXYCYTIDINE-5'-MONOPHOSPHATE GROUP"), 48.976 -"CQR": ('x', 'p', None, None, '"[(4Z)-2-(AMINOMETHYL)-4-(4-HYDROXYBENZYLIDENE)-5-OXO-4,5-DIHYDRO-1H-IMIDAZOL-1-YL]ACETIC'), 48.977 -"XAD": ('a', 'd', 'M', None, '9-(2,3-DIDEOXY-6-O-PHOSPHONO-BETA-D-ERYTHRO-HEXOPYRANOSYL)-9H-PURIN-6-AMINE'), 48.978 -"XAE": ('n', 'd', None, None, '3-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-3H-IMIDAZO[4,5-G]QUINAZOLIN-8-AMINE'), 48.979 -"TYI": ('y', 'p', 'M', None, '3,5-DIIODOTYROSINE'), 48.980 -"VLL": ('x', 'p', None, None, '"(2S)-2,3-DIAMINOBUTANOIC'), 48.981 -"2BD": ('n', 'd', None, None, "N1-(1-HYDROXY-3-BUTEN-2(S)-YL)-2'-DEOXYINOSINE MONO PHOSPHORIC ACID"), 48.982 -"IIC": ('x', 'p', None, None, '"4-IMIDAZOLMETHYLENE-5-IMIDAZOLONE'), 48.983 -"A66": ('x', 'p', None, None, '2-AMINOETHYLLYSINE-CARBONYLMETHYLENE-ADENINE'), 48.984 -"2EG": ('g', 'd', 'M', None, "2'-DEOXY-N-ETHYLGUANOSINE 5'-PHOSPHATE"), 48.985 -"2SA": ('n', 'r', None, None, '"2-[9-(3,4-DIHYDROXY-5-PHOSPHONOOXYMETHYL-TETRAHYDRO-FURAN-2-YL)-9H-PURIN-6-YLAMINO]-SUCCINIC'), 48.986 -"2CO": ('c', 'p', 'M', None, 'S-HYDROPEROXYCYSTEINE'), 48.987 -"TMD": ('t', 'p', 'M', None, '(6,7-DIHYDRO)4-[(E)-BUTENYL]-4,N-DIMETHYL-THREONINE'), 48.988 -"CXM": ('m', 'p', 'M', None, 'N-CARBOXYMETHIONINE'), 48.989 -"ALA": ('A', 'p', None, None, 'ALANINE'), 48.990 -"GOM": ('g', 'r', 'M', None, 'GLUTAMOL-AMP'), 48.991 -"2VA": ('v', 'p', 'M', None, '"2\'-(L-VALYL)AMINO-2\'-DEOXYADENOSINE"'), 48.992 -"5BU": ('u', 'r', 'M', None, '"5-BROMO-URIDINE-5\'-MONOPHOSPHATE"'), 48.993 -"IGU": ('g', 'd', 'M', None, '"2\'-DEOXYISOGUANINE-5\'-MONOPHOSPHATE"'), 48.994 -"MLE": ('l', 'p', 'M', None, 'N-METHYLLEUCINE'), 48.995 -"T39": ('n', 'r', None, None, "2'-O-METHOXYETHYLENE THYMIDINE 5'-MONOPHOSPHATE"), 48.996 -"FLT": ('y', 'p', 'M', None, '"FLUOROMALONYL'), 48.997 -"U37": ('u', 'r', 'M', None, "URIDINE 5'-MONOTHIOPHOSPHATE"), 48.998 -"U36": ('u', 'r', 'M', None, '"2\'-O-METHYOXYETHYL-URIDINE-5\'-MONOPHOSPHATE"'), 48.999 -"DHL": ('x', 'p', None, None, '2-AMINO-ETHANETHIOL'), 48.1000 -"U33": ('n', 'd', None, None, "5-BROMO-2'-DEOXY URIDINE"), 48.1001 -"TIH": ('a', 'p', 'M', None, 'BETA(2-THIENYL)ALANINE'), 48.1002 -"SMF": ('f', 'p', 'M', None, '4-SULFOMETHYL-L-PHENYLALANINE'), 48.1003 -"FLA": ('a', 'p', 'M', None, 'TRIFLUOROALANINE'), 48.1004 -"HOB": ('n', 'd', None, None, '"CHOLEST-5-EN-3-YL'), 48.1005 -"FLE": ('l', 'p', 'M', None, 'FUROYL-LEUCINE'), 48.1006 -"VDL": ('x', 'p', None, None, '"(2R,3R)-2,3-DIAMINOBUTANOIC'), 48.1007 -"A1P": ('n', 'd', None, None, '9-{2-DEOXY-5-O-[HYDROXY(OXIDO)PHOSPHINO]-BETA-L-ERYTHRO-PENTOFURANOSYL}-9H-PURIN-2-AMINE'), 48.1008 -"0G6": ('x', 'p', None, None, 'D-PHENYLALANYL-N-[(1S)-4-{[AMINO(IMINIO)METHYL]AMINO}-1-(CHLOROACETYL)BUTYL]-L-PROLINAMIDE'), 48.1009 -"LLY": ('k', 'p', 'M', None, 'NZ-(DICARBOXYMETHYL)LYSINE'), 48.1010 -"DHI": ('h', 'p', 'M', None, 'D-HISTIDINE'), 48.1011 -"RON": ('x', 'p', None, None, 'NORVALINE'), 48.1012 -"PTH": ('y', 'p', 'M', None, 'CE1-METHYLENE-HYDROXY-PHOSPHOTYROSINE'), 48.1013 -"T32": ('t', 'd', 'M', None, "6'-ALPHA-METHYL CARBOCYCLIC THYMIDINE 5'-MONOPHOSPHATE"), 48.1014 -"LLP": ('k', 'p', 'M', None, '2-LYSINE(3-HYDROXY-2-METHYL-5-PHOSPHONOOXYMETHYL-PYRIDIN-4-YLMETHANE)'), 48.1015 -"MNU": ('u', 'r', 'M', None, '"(2R,4S)-1-[(4R)-3,4-DIHYDROXYTETRAHYDROFURAN-2-YL]-5-[(METHYLAMINO)METHYL]-1,2,3,4-TETRAHYDROPYRIMIDINE-2,4-DIOL-5\'-MONOPHOSPHATE"'), 48.1016 -"PTA": ('x', 'p', None, None, '"[(1-AMINO-3-METHYL-BUTYL)-HYDROXY-PHOSPHINOYL]-ACETIC'), 48.1017 -"TZO": ('x', 'p', None, None, '"1,3-THIAZOLE-4-CARBOXYLIC'), 48.1018 -"DBZ": ('a', 'p', 'M', None, '3-(BENZOYLAMINO)-L-ALANINE'), 48.1019 -"MNL": ('l', 'p', 'M', None, '4,N-DIMETHYLNORLEUCINE'), 48.1020 -"APN": ('a', 'd', 'M', None, '2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-ADENINE'), 48.1021 -"OCY": ('c', 'p', 'M', None, 'HYDROXYETHYLCYSTEINE'), 48.1022 -"BCS": ('c', 'p', 'M', None, 'BENZYLCYSTEINE'), 48.1023 -"OSE": ('s', 'p', 'M', None, 'O-SULFO-L-SERINE'), 48.1024 -"3TY": ('x', 'p', None, None, '3-[(3E)-3-(BENZYLHYDRAZONO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE'), 48.1025 -"SCS": ('c', 'p', 'M', None, '3-(ETHYLDISULFANYL)-L-ALANINE'), 48.1026 -"TYY": ('y', 'p', 'M', None, '3-(4-HYDROXY-3-IMINO-6-OXO-CYCLOHEXA-1,4-DIENYL)-ALANINE'), 48.1027 -"0A5": ('n', 'p', 'M', None, 'N~2~-PROPANOYL-L-ASPARAGINE'), 48.1028 -"PBT": ('n', 'd', None, None, '"[3-HYDROXY-5-(5-METHYL-2,4-DIOXOTETRAHYDRO-1(2H)-PYRIMIDINYL)TETRAHYDRO-2-FURANYL]METHYL'), 48.1029 -"CWR": ('s', 'p', 'M', None, '"(4-METHYL-5-OXO-2,5-DIHYDRO-1H-IMIDAZOL-1-YL)ACETIC'), 48.1030 -"TRO": ('w', 'p', 'M', None, '2-HYDROXY-TRYPTOPHAN'), 48.1031 -"TRN": ('w', 'p', 'M', None, 'NZ2-TRYPTOPHAN'), 48.1032 -"PYO": ('u', 'r', 'M', None, '"1-(BETA-D-RIBOFURANOSYL)-PYRIMIDIN-2-ONE-5\'-PHOSPHATE"'), 48.1033 -"ZGU": ('g', 'r', 'M', None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-GUANINE"'), 48.1034 -"TRG": ('k', 'p', 'M', None, '"L-(N,N'), 48.1035 -"TRF": ('w', 'p', 'M', None, 'N1-FORMYL-TRYPTOPHAN'), 48.1036 -"R1F": ('c', 'p', 'M', None, '3-{[(2,2,5,5-TETRAMETHYL-1-OXO-4-PHENYL-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE'), 48.1037 -"M5M": ('c', 'r', 'M', None, '"2\'-(N-ACETAMIDE)-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.1038 -"PBF": ('f', 'p', 'M', None, 'PARA-(BENZOYL)-PHENYLALANINE'), 48.1039 -"R1B": ('c', 'p', 'M', None, '3-{[(2,2,4,5,5-PENTAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-L-ALANINE'), 48.1040 -"DHA": ('a', 'p', 'M', None, '"2-AMINO-ACRYLIC'), 48.1041 -"YCM": ('c', 'p', 'M', None, 'S-(2-AMINO-2-OXOETHYL)-L-CYSTEINE'), 48.1042 -"R1A": ('c', 'p', 'M', None, '3-{[(2,2,5,5-TETRAMETHYL-1-OXO-2,5-DIHYDRO-1H-PYRROLIUM-3-YL)METHYL]DISULFANYL}-D-ALANINE'), 48.1043 -"TRQ": ('w', 'p', 'M', None, '"2-AMINO-3-(6,7-DIOXO-6,7-DIHYDRO-1H-INDOL-3-YL)-PROPIONIC'), 48.1044 -"TRP": ('W', 'p', None, None, 'TRYPTOPHAN'), 48.1045 -"T23": ('n', 'r', None, None, '"2\'-O-METHYL-3\'-METHYL-3\'-DEOXY-ARABINOFURANOSYL-THYMINE-5\'-PHOSPHATE"'), 48.1046 -"TRW": ('w', 'p', 'M', None, '"TRW3-(2-AMINO-3-HYDROXY-PROPYL)-6-(N\'-CYCLOHEXYL-HYDRAZINO)OCTAHYDRO-INDOL-7-OL"'), 48.1047 -"4FW": ('w', 'p', 'M', None, '4-FLUOROTRYPTOPHANE'), 48.1048 -"IVA": ('x', 'p', None, None, '"ISOVALERIC'), 48.1049 -"VOL": ('x', 'p', None, None, 'L-VALINOL'), 48.1050 -"6HT": ('t', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(THYMIN-1-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'), 48.1051 -"6HG": ('g', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(GUANIN-9-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'), 48.1052 -"MPJ": ('x', 'p', None, None, '"(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHINIC'), 48.1053 -"C43": ('c', 'r', 'M', None, '"2\'-O-METHYOXYETHYL-CYTIDINE-5\'-MONOPHOSPHATE"'), 48.1054 -"MPH": ('x', 'p', None, None, '"(1-AMINO-3-METHYLSULFANYL-PROPYL)-PHOSPHONIC'), 48.1055 -"AVN": ('x', 'p', None, None, '"(2S)-AMINO[(5S)-3-CHLORO-4,5-DIHYDROISOXAZOL-5-YL]ACETIC'), 48.1056 -"M3L": ('k', 'p', 'M', None, 'N-TRIMETHYLLYSINE'), 48.1057 -"C5C": ('c', 'p', 'M', None, '"S-CYCLOPENTYL'), 48.1058 -"HTR": ('w', 'p', 'M', None, 'BETA-HYDROXYTRYPTOPHANE'), 48.1059 -"MPQ": ('g', 'p', 'M', None, 'N-METHYL-ALPHA-PHENYL-GLYCINE'), 48.1060 -"ESC": ('m', 'p', 'M', None, '"2-AMINO-4-ETHYL'), 48.1061 -"3ME": ('n', 'd', None, None, '"PHOSPHORIC'), 48.1062 -"TYS": ('y', 'p', 'M', None, 'O-SULFO-L-TYROSINE'), 48.1063 -"6HA": ('a', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(ADENIN-9-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'), 48.1064 -"KCX": ('k', 'p', 'M', None, '"LYSINE'), 48.1065 -"6HC": ('c', 'd', 'M', None, '"1\',5\'-ANHYDRO-2\',3\'-DIDEOXY-2\'-(CYTOSIN-1-YL)-6\'-O-PHOSPHORYL-D-ARABINO-HEXITOL"'), 48.1066 -"AVC": ('a', 'r', 'M', None, '"ADENOSINE-5\'-MONOPHOSPHATE-2\',3\'-VANADATE"'), 48.1067 -"GTP": ('g', 'r', 'M', None, '"GUANOSINE-5\'-TRIPHOSPHATE"'), 48.1068 -"ACB": ('d', 'p', 'M', None, '"3-METHYL-ASPARTIC'), 48.1069 -"2SI": ('x', 'p', None, None, '"2-O-SULFO-A-L-IDURONIC'), 48.1070 -"NEM": ('h', 'p', 'M', None, '"NE2-METHYLATED'), 48.1071 -"N7P": ('p', 'p', 'M', None, '1-ACETYL-D-PROLINE'), 48.1072 -"SUB": ('x', 'p', None, None, '"3-PHENYL-2-{[4-(TOLUENE-4-SULFONYL)-THIOMORPHOLINE-3-CARBONYL]-AMINO}-PROPIONIC'), 48.1073 -"2AG": ('g', 'p', 'M', None, '"(2S)-2-AMINOPENT-4-ENOIC'), 48.1074 -"SUN": ('s', 'p', 'M', None, 'O-[(R)-(DIMETHYLAMINO)(ETHOXY)PHOSPHORYL]-L-SERINE'), 48.1075 -"SUI": ('x', 'p', None, None, '"(3-AMINO-2,5-DIOXO-1-PYRROLIDINYL)ACETIC'), 48.1076 -"ACL": ('r', 'p', 'M', None, 'DEOXY-CHLOROMETHYL-ARGININE'), 48.1077 -"SUR": ('u', 'r', 'M', None, '"1-(BETA-D-RIBOFURANOSYL)-2-THIO-URACIL-5\'-PHOSPHATE"'), 48.1078 -"BVP": ('n', 'd', None, None, '"(E)-5-(2-BROMOVINYL)-2\'-DEOXYURIDINE-5\'-MONOPHOSPHATE"'), 48.1079 -"HFA": ('x', 'p', None, None, '"ALPHA-HYDROXY-BETA-PHENYL-PROPIONIC'), 48.1080 -"LET": ('x', 'p', None, None, '(Z)-N^6-{3-CARBOXY-1-[(4-CARBOXY-2-OXOBUTOXY)METHYL]PROPYLIDENE}-L-LYSINE'), 48.1081 -"0A8": ('c', 'p', 'M', None, 'S-[(2-CHLOROETHYL)CARBAMOYL]-L-CYSTEINE'), 48.1082 -"D4P": ('x', 'p', None, None, '"(2S)-AMINO(4-HYDROXYPHENYL)ACETIC'), 48.1083 -"2OT": ('t', 'd', 'M', None, "2'-O-[2-(N,N-DIMETHYLAMINOOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"), 48.1084 -"NEP": ('h', 'p', 'M', None, 'N1-PHOSPHONOHISTIDINE'), 48.1085 -"PMT": ('c', 'r', 'M', None, '"PHOSPHORIC'), 48.1086 -"2DT": ('t', 'd', 'M', None, '"3\'-DEOXYTHYMIDINE-5\'-MONOPHOSPHATE"'), 48.1087 -"MLY": ('k', 'p', 'M', None, 'N-DIMETHYL-LYSINE'), 48.1088 -"AD2": ('a', 'd', 'M', None, '"2\'-DEOXY-ADENOSINE-3\'-5\'-DIPHOSPHATE"'), 48.1089 -"CGA": ('e', 'p', 'M', None, '"CARBOXYMETHYLATED'), 48.1090 -"NBQ": ('y', 'p', 'M', None, '2-HYDROXY-5-({1-[(2-NAPHTHYLOXY)METHYL]-3-OXOPROP-1-ENYL}AMINO)TYROSINE'), 48.1091 -"ODS": ('x', 'p', None, None, '"4-METHYLPIPERAZIN-1-YL'), 48.1092 -"7GU": ('g', 'd', 'M', None, '"7-DEAZA-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.1093 -"SAH": ('c', 'p', 'M', None, 'S-ADENOSYL-L-HOMOCYSTEINE'), 48.1094 -"2DF": ('n', 'd', None, None, '"N-(2-DEOXY-BETA-D-ERYTHO-PENTOFURANOSYL-5-PHOSPHATE)'), 48.1095 -"2DA": ('a', 'd', 'M', None, '"2\',3\'-DIDEOXYADENOSINE-5\'-MONOPHOSPHATE"'), 48.1096 -"PCC": ('p', 'p', 'M', None, '5-OXOPROLINE'), 48.1097 -"ODA": ('x', 'p', None, None, '"9-AMINO-6,10-DIOXO-OCTAHYDRO-PYRIDAZINO[1,2-A][1,2]DIAZEPINE-1-CARBOXYLIC'), 48.1098 -"2DO": ('x', 'p', None, None, '(2S)-2-AMINOHEXANE-1,1-DIOL'), 48.1099 -"CGU": ('e', 'p', 'M', None, '"GAMMA-CARBOXY-GLUTAMIC'), 48.1100 -"L2A": ('x', 'p', None, None, '"(2S,5S,8S,11S,15E,20S)-20-AMINO-2-(CARBOXYMETHYL)-11,20-DIMETHYL-5,8-BIS(2-METHYLPROPYL)-3,6,9,21-TETRAOXO-1,4,7,10-TETRAAZACYCLOHENICOS-15-ENE-11-CARBOXYLIC'), 48.1101 -"DNG": ('l', 'p', 'M', None, 'N-FORMYL-D-NORLEUCINE'), 48.1102 -"UMA": ('a', 'p', 'M', None, '"URIDINE-5\'-DIPHOSPHATE-N-ACETYLMURAMOYL-L-ALANINE"'), 48.1103 -"PM3": ('f', 'p', 'M', None, '"2-AMINO-3-(4-PHOSPHONOMETHYL-PHENYL)-PROPIONIC'), 48.1104 -"DNM": ('l', 'p', 'M', None, 'N-METHYL-D-NORLEUCINE'), 48.1105 -"DNL": ('k', 'p', 'M', None, '6-AMINO-HEXANAL'), 48.1106 -"GMA": ('e', 'p', 'M', None, '"4-AMIDO-4-CARBAMOYL-BUTYRIC'), 48.1107 -"1LU": ('l', 'p', 'M', None, '"4-METHYL-PENTANOIC'), 48.1108 -"CDV": ('x', 'p', None, None, '"3-METHYL-2-UREIDO-BUTYRIC'), 48.1109 -"ADX": ('n', 'r', None, None, '"ADENOSINE-5\'-PHOSPHOSULFATE"'), 48.1110 -"LKC": ('n', 'd', None, None, '4-AMINO-1-[(1S,3R,4R,7S)-7-HYDROXY-1-(HYDROXYMETHYL)-2,5-DIOXABICYCLO[2.2.1]HEPT-3-YL]-5-METHYLPYRIMIDIN-2(1H)-ONE'), 48.1111 -"ADD": ('x', 'p', None, None, '"2,6,8-TRIMETHYL-3-AMINO-9-BENZYL-9-METHOXYNONANOIC'), 48.1112 -"CUC": ('x', 'p', None, None, '"CYCLOHEXYLGLYCYL'), 48.1113 -"DNP": ('a', 'p', 'M', None, '3-AMINO-ALANINE'), 48.1114 -"DNS": ('k', 'p', 'M', None, 'N~6~-{[5-(DIMETHYLAMINO)-1-NAPHTHYL]SULFONYL}-L-LYSINE'), 48.1115 -"DNR": ('c', 'd', 'M', None, "2'-DEOXY-N3-PROTONATED CYTIDINE-5'-MONOPHOSPHATE"), 48.1116 -"CG1": ('g', 'r', 'M', None, '"5\'-O-[(R)-HYDROXY(METHOXY)PHOSPHORYL]GUANOSINE"'), 48.1117 -"Z": ('c', 'd', 'M', None, 'ZEBULARINE'), 48.1118 -"GMU": ('n', 'd', None, None, "2'-O-[(2-GUANIDINIUM)ETHYL]-5-METHYLURIDINE 5'-MONOPHOSPHATE"), 48.1119 -"C99": ('x', 'p', None, None, '"{(2R)-2-[(1S,2R)-1-AMINO-2-HYDROXYPROPYL]-2-HYDROXY-4,5-DIOXOIMIDAZOLIDIN-1-YL}ACETIC'), 48.1120 -"GDP": ('g', 'r', 'M', None, '"GUANOSINE-5\'-DIPHOSPHATE"'), 48.1121 -"MED": ('m', 'p', 'M', None, 'D-METHIONINE'), 48.1122 -"GDR": ('g', 'd', 'M', None, '"GUANOSINE-5\'-DIPHOSPHATE-RHAMNOSE"'), 48.1123 -"MEA": ('f', 'p', 'M', None, 'N-METHYLPHENYLALANINE'), 48.1124 -"LEF": ('l', 'p', 'M', None, '(4S)-5-FLUORO-L-LEUCINE'), 48.1125 -"LEH": ('l', 'p', 'M', None, 'N-[12-(1H-IMIDAZOL-1-YL)DODECANOYL]-L-LEUCINE'), 48.1126 -"CHG": ('x', 'p', None, None, '"CYCLOHEXYL-GLYCINE"'), 48.1127 -"MEN": ('n', 'p', 'M', None, '"N-METHYL'), 48.1128 -"G42": ('g', 'd', 'M', None, '"8-OXO-2\'-DEOXY-GUANOSINE-5\'-MONOPHOSPHATE"'), 48.1129 -"5CG": ('g', 'd', 'M', None, '"5\'-CHLORO-5\'-DEOXY-GUANOSINE"'), 48.1130 -"T31": ('u', 'r', 'M', None, "THYMIDINE 5'-MONOTHIOPHOSPHATE"), 48.1131 -"MEU": ('g', 'p', 'M', None, 'O-METHYL-GLYCINE'), 48.1132 -"MET": ('M', 'p', None, None, 'METHIONINE'), 48.1133 -"U34": ('u', 'r', 'M', None, "URIDINE 5'-MONOMETHYLPHOSPHATE"), 48.1134 -"MEQ": ('q', 'p', 'M', None, 'N5-METHYLGLUTAMINE'), 48.1135 -"MEP": ('u', 'r', 'M', None, '"PHOSPHORIC'), 48.1136 -"B1P": ('n', 'd', None, None, '2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSE'), 48.1137 -"IG": ('g', 'r', 'M', None, '"ISOGUANOSINE-5\'-MONOPHOSPHATE"'), 48.1138 -"T2S": ('n', 'r', None, None, '"THYMIDINE-5\'-DITHIOPHOSPHORATE"'), 48.1139 -"ZHP": ('n', 'r', None, None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-HYDROXYPYRIDONE"'), 48.1140 -"AZK": ('k', 'p', 'M', None, '"(S)-2-AMINO-6-AZIDOHEXANOIC'), 48.1141 -"U31": ('u', 'r', 'M', None, "2'-O-3-AMINOPROPYL 2'-DEOXYURIDINE-5'-MONOPHOSPHATE"), 48.1142 -"LTR": ('w', 'p', 'M', None, 'L-TRYPTOPHAN'), 48.1143 -"MSU": ('x', 'p', None, None, '"SUCCINIC'), 48.1144 -"1PA": ('f', 'p', 'M', None, '"PHENYLMETHYLACETIC'), 48.1145 -"MSP": ('a', 'p', 'M', None, '"5\'-O-[(L-METHIONYL)-SULPHAMOYL]ADENOSINE"'), 48.1146 -"MSO": ('m', 'p', 'M', None, '"SELENOMETHIONINE'), 48.1147 -"AZY": ('y', 'p', 'M', None, '3-AZIDO-L-TYROSINE'), 48.1148 -"ENA": ('n', 'r', None, None, 'ETHENO-NAD'), 48.1149 -"MSL": ('m', 'p', 'M', None, '"(2S)-2-AMINO-4-(METHYLSULFONIMIDOYL)BUTANOIC'), 48.1150 -"PF5": ('f', 'p', 'M', None, '2,3,4,5,6-PENTAFLUORO-L-PHENYLALANINE'), 48.1151 -"1PR": ('n', 'r', None, None, '"PHOSPHORIC'), 48.1152 -"3AR": ('x', 'p', None, None, 'N-OMEGA-PROPYL-L-ARGININE'), 48.1153 -"MSE": ('m', 'p', 'M', None, 'SELENOMETHIONINE'), 48.1154 -"AZS": ('s', 'p', 'M', None, 'O-DIAZOACETYL-L-SERINE'), 48.1155 -"MSA": ('g', 'p', 'M', None, '"(2-S-METHYL)'), 48.1156 -"R2P": ('x', 'p', None, None, '"(2R)-2-AMINO-3-(4-HYDROXY-1,2,5-THIADIAZOL-3-YL)PROPANOIC'), 48.1157 -"CAR": ('c', 'd', 'M', None, "CYTOSINE ARABINOSE-5'-PHOSPHATE"), 48.1158 -"DAB": ('a', 'p', 'M', None, '"2,4-DIAMINOBUTYRIC'), 48.1159 -"OTY": ('y', 'p', 'M', None, '"2-AMINO-3-(4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIENYL)PROPANOIC'), 48.1160 -"DAL": ('a', 'p', 'M', None, 'D-ALANINE'), 48.1161 -"DAM": ('x', 'p', None, None, 'N-METHYL-ALPHA-BETA-DEHYDROALANINE'), 48.1162 -"TT": ('n', 'd', None, None, '"[(1R,3R,4S,9R,10S,12R,15AS,15BR,18BR,18CS)-10-HYDROXY-15A,15B-DIMETHYL-13,15,16,18-TETRAOXOHEXADECAHYDRO-8H-9,12-EPOXY-1,4-METHANO-2,5,7-TRIOXA-12A,14,17,18A-TETRAAZACYCLOHEXADECA[1,2,3,4-DEF]BIPHENYLEN-3-YL]METHYL DIHYDROGEN PHOSPHATE"'), 48.1163 -"IOY": ('f', 'p', 'M', None, 'P-IODO-D-PHENYLALANINE'), 48.1164 -"DAH": ('f', 'p', 'M', None, '3,4-DIHYDROXYPHENYLALANINE'), 48.1165 -"8MG": ('g', 'd', 'M', None, '"8-METHYL-2\'-DEOXYGUANOSINE-5\'-MONOPHOSPHATE"'), 48.1166 -"BZG": ('n', 'd', None, None, '6-(BENZYLOXY)-9-(2-DEOXY-5-O-PHOSPHONO-BETA-D-ERYTHRO-PENTOFURANOSYL)-9H-PURIN-2-AMINE'), 48.1167 -"ARM": ('r', 'p', 'M', None, 'DEOXY-METHYL-ARGININE'), 48.1168 -"DAR": ('r', 'p', 'M', None, 'D-ARGININE'), 48.1169 -"DAS": ('d', 'p', 'M', None, '"D-ASPARTIC'), 48.1170 -"MCY": ('c', 'd', 'M', None, '"5-METHYL-2\'-DEOXYCYTIDINE"'), 48.1171 -"SGB": ('s', 'p', 'M', None, 'O-[(S)-METHYL(1-METHYLETHOXY)PHOSPHORYL]-L-SERINE'), 48.1172 -"YRR": ('n', 'd', None, None, '"3-HYDROXY-PYRROLIDIN-2-YLMETHYL-MONOPHOSPHATE'), 48.1173 -"NF2": ('n', 'r', None, None, '(1S)-1,4-ANHYDRO-1-(2,4-DIFLUORO-5-METHYLPHENYL)-5-O-PHOSPHONO-D-RIBITOL'), 48.1174 -"CNU": ('u', 'r', 'M', None, '"5-CYANO-URIDINE-5\'-MONOPHOSPHATE"'), 48.1175 -"E": ('a', 'd', 'M', None, "N-((-)-(7S,8R,9S,10R)[7,8,9-TRIHYDROXY-7,8,9,10-TETRA HYDROBENZO[A]PYREN-10-YL])-2'-DEOXY-ADENOSINE-5'-MONOPHOSPHATE"), 48.1176 -"AA3": ('a', 'p', 'M', None, '"2-AMINOBUTYRIC'), 48.1177 -"GVL": ('x', 'p', None, None, 'O-[(R)-{[(3R)-4-AMINO-3-HYDROXY-2,2-DIMETHYL-4-OXOBUTYL]OXY}(HYDROXY)PHOSPHORYL]-L-SERINE'), 48.1178 -"TNR": ('s', 'p', 'M', None, 'O-(2-ACETAMIDO-2-DEOXY-ALPHA-D-GALACTOPYRANOSYL)-L-SERINE'), 48.1179 -"HOL": ('n', 'd', None, None, '"CHOLEST-5-EN-3-YL'), 48.1180 -"PTM": ('y', 'p', 'M', None, 'ALPHA-METHYL-O-PHOSPHOTYROSINE'), 48.1181 -"AS": ('a', 'd', 'M', None, "2-DEOXY-ADENOSINE -5'-THIO-MONOPHOSPHATE"), 48.1182 -"U": ('U', 'r', None, None, '"URIDINE-5\'-MONOPHOSPHATE"'), 48.1183 -"ESB": ('y', 'p', 'M', None, '3-[(3E)-3-(ETHYLIMINO)-4-HYDROXY-6-OXOCYCLOHEXA-1,4-DIEN-1-YL]-L-ALANINE'), 48.1184 -"DA2": ('x', 'p', None, None, 'NG,NG-DIMETHYL-L-ARGININE'), 48.1185 -"TNB": ('c', 'p', 'M', None, 'S-(2,3,6-TRINITROPHENYL)CYSTEINE'), 48.1186 -"RMP": ('a', 'd', 'M', None, '"2\'-DEOXY-ADENOSINE-5\'-RP-MONOMETHYLPHOSPHONATE"'), 48.1187 -"SME": ('m', 'p', 'M', None, '"METHIONINE'), 48.1188 -"NDF": ('f', 'p', 'M', None, 'N-(CARBOXYCARBONYL)-D-PHENYLALANINE'), 48.1189 -"BOE": ('t', 'd', 'M', None, "2'-O-[2-(BENZYLOXY)ETHYL] THYMIDINE-5'-MONOPHOSPHATE"), 48.1190 -"LGP": ('g', 'd', 'M', None, '"N9-1-HYDROXY-PROP-2-OXYMETHYL-GUANINE-3\'-MONOPHOSPHATE"'), 48.1191 -"SCH": ('c', 'p', 'M', None, 'S-METHYL-THIO-CYSTEINE'), 48.1192 -"T2T": ('n', 'd', None, None, '[(2S,3S,5R)-3-[(2S)-3-({[(2R,3S,4R,5R)-3-HYDROXY-4-METHOXY-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL}AMINO)-2-METHYL-3-OXOPROPYL]-5-(5-METHYL-2,4-DIOXO-3,4-DIHYDROPYRIMIDIN-1(2H)-YL)TETRAHYDROFURAN-2-YL]METHYL DIHYDROGEN PHOSPHATE'), 48.1193 -"PFF": ('f', 'p', 'M', None, '4-FLUORO-L-PHENYLALANINE'), 48.1194 -"PCE": ('x', 'p', None, None, '"2-(3-AMINO-4-CYCLOHEXYL-2-HYDROXY-BUTYL)-PENT-4-YNOIC'), 48.1195 -"DFO": ('x', 'p', None, None, '2,2-DIFLUORO-3-HYDROSTATINE'), 48.1196 -"DXN": ('n', 'd', None, None, '(1R,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL'), 48.1197 -"KGC": ('k', 'p', 'M', None, 'N~6~-[(2R)-2-CARBOXY-5-OXOTETRAHYDROFURAN-2-YL]-L-LYSINE'), 48.1198 -"US1": ('n', 'd', None, None, "2'-DEOXY-3'-THIOURIDINE 5'-(DIHYDROGEN PHOSPHATE)"), 48.1199 -"HHK": ('x', 'p', None, None, '"(2S)-2,8-DIAMINOOCTANOIC'), 48.1200 -"DXD": ('n', 'd', None, None, '(1S,3S,4R)-4-(PHOSPHOOXYMETHYL)-CYCLOPENTANE-1,3-DIOL'), 48.1201 -"ZCY": ('c', 'r', 'M', None, '"(S)-1\'-(2\',3\'-DIHYDROXYPROPYL)-CYTOSINE"'), 48.1202 -"IEL": ('k', 'p', 'M', None, 'N~6~-[(1Z)-ETHANIMIDOYL]-L-LYSINE'), 48.1203 -}
49.1 --- a/lib/allpy_data/__init__.py Mon Jan 24 21:40:10 2011 +0300 49.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 49.3 @@ -1,3 +0,0 @@ 49.4 -""" 49.5 -Module that contains various data relevant to biological sequences. 49.6 -"""
50.1 --- a/lib/allpy_data/amino_acids.py Mon Jan 24 21:40:10 2011 +0300 50.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 50.3 @@ -1,8 +0,0 @@ 50.4 - 50.5 -amino_acids = [ 50.6 - 'A', 'C', 'D', 'E', 'F', 50.7 - 'G', 'H', 'I', 'K', 'L', 50.8 - 'M', 'N', 'P', 'Q', 'R', 50.9 - 'S', 'T', 'V', 'W', 'Y', 50.10 -] 50.11 -
51.1 --- a/lib/allpy_data/blossum62.py Mon Jan 24 21:40:10 2011 +0300 51.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 51.3 @@ -1,28 +0,0 @@ 51.4 -matrix = { 51.5 -"A": {"A": 4, "R": -1, "N": -2, "D": -2, "C": 0, "Q": -1, "E": -1, "G": 0, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S": 1, "T": 0, "W": -3, "Y": -2, "V": 0, "B": -2, "Z": -1, "X": 0, "*": -4}, 51.6 -"R": {"A": -1, "R": 5, "N": 0, "D": -2, "C": -3, "Q": 1, "E": 0, "G": -2, "H": 0, "I": -3, "L": -2, "K": 2, "M": -1, "F": -3, "P": -2, "S": -1, "T": -1, "W": -3, "Y": -2, "V": -3, "B": -1, "Z": 0, "X": -1, "*": -4}, 51.7 -"N": {"A": -2, "R": 0, "N": 6, "D": 1, "C": -3, "Q": 0, "E": 0, "G": 0, "H": 1, "I": -3, "L": -3, "K": 0, "M": -2, "F": -3, "P": -2, "S": 1, "T": 0, "W": -4, "Y": -2, "V": -3, "B": 3, "Z": 0, "X": -1, "*": -4}, 51.8 -"D": {"A": -2, "R": -2, "N": 1, "D": 6, "C": -3, "Q": 0, "E": 2, "G": -1, "H": -1, "I": -3, "L": -4, "K": -1, "M": -3, "F": -3, "P": -1, "S": 0, "T": -1, "W": -4, "Y": -3, "V": -3, "B": 4, "Z": 1, "X": -1, "*": -4}, 51.9 -"C": {"A": 0, "R": -3, "N": -3, "D": -3, "C": 9, "Q": -3, "E": -4, "G": -3, "H": -3, "I": -1, "L": -1, "K": -3, "M": -1, "F": -2, "P": -3, "S": -1, "T": -1, "W": -2, "Y": -2, "V": -1, "B": -3, "Z": -3, "X": -2, "*": -4}, 51.10 -"Q": {"A": -1, "R": 1, "N": 0, "D": 0, "C": -3, "Q": 5, "E": 2, "G": -2, "H": 0, "I": -3, "L": -2, "K": 1, "M": 0, "F": -3, "P": -1, "S": 0, "T": -1, "W": -2, "Y": -1, "V": -2, "B": 0, "Z": 3, "X": -1, "*": -4}, 51.11 -"E": {"A": -1, "R": 0, "N": 0, "D": 2, "C": -4, "Q": 2, "E": 5, "G": -2, "H": 0, "I": -3, "L": -3, "K": 1, "M": -2, "F": -3, "P": -1, "S": 0, "T": -1, "W": -3, "Y": -2, "V": -2, "B": 1, "Z": 4, "X": -1, "*": -4}, 51.12 -"G": {"A": 0, "R": -2, "N": 0, "D": -1, "C": -3, "Q": -2, "E": -2, "G": 6, "H": -2, "I": -4, "L": -4, "K": -2, "M": -3, "F": -3, "P": -2, "S": 0, "T": -2, "W": -2, "Y": -3, "V": -3, "B": -1, "Z": -2, "X": -1, "*": -4}, 51.13 -"H": {"A": -2, "R": 0, "N": 1, "D": -1, "C": -3, "Q": 0, "E": 0, "G": -2, "H": 8, "I": -3, "L": -3, "K": -1, "M": -2, "F": -1, "P": -2, "S": -1, "T": -2, "W": -2, "Y": 2, "V": -3, "B": 0, "Z": 0, "X": -1, "*": -4}, 51.14 -"I": {"A": -1, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -3, "E": -3, "G": -4, "H": -3, "I": 4, "L": 2, "K": -3, "M": 1, "F": 0, "P": -3, "S": -2, "T": -1, "W": -3, "Y": -1, "V": 3, "B": -3, "Z": -3, "X": -1, "*": -4}, 51.15 -"L": {"A": -1, "R": -2, "N": -3, "D": -4, "C": -1, "Q": -2, "E": -3, "G": -4, "H": -3, "I": 2, "L": 4, "K": -2, "M": 2, "F": 0, "P": -3, "S": -2, "T": -1, "W": -2, "Y": -1, "V": 1, "B": -4, "Z": -3, "X": -1, "*": -4}, 51.16 -"K": {"A": -1, "R": 2, "N": 0, "D": -1, "C": -3, "Q": 1, "E": 1, "G": -2, "H": -1, "I": -3, "L": -2, "K": 5, "M": -1, "F": -3, "P": -1, "S": 0, "T": -1, "W": -3, "Y": -2, "V": -2, "B": 0, "Z": 1, "X": -1, "*": -4}, 51.17 -"M": {"A": -1, "R": -1, "N": -2, "D": -3, "C": -1, "Q": 0, "E": -2, "G": -3, "H": -2, "I": 1, "L": 2, "K": -1, "M": 5, "F": 0, "P": -2, "S": -1, "T": -1, "W": -1, "Y": -1, "V": 1, "B": -3, "Z": -1, "X": -1, "*": -4}, 51.18 -"F": {"A": -2, "R": -3, "N": -3, "D": -3, "C": -2, "Q": -3, "E": -3, "G": -3, "H": -1, "I": 0, "L": 0, "K": -3, "M": 0, "F": 6, "P": -4, "S": -2, "T": -2, "W": 1, "Y": 3, "V": -1, "B": -3, "Z": -3, "X": -1, "*": -4}, 51.19 -"P": {"A": -1, "R": -2, "N": -2, "D": -1, "C": -3, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -3, "L": -3, "K": -1, "M": -2, "F": -4, "P": 7, "S": -1, "T": -1, "W": -4, "Y": -3, "V": -2, "B": -2, "Z": -1, "X": -2, "*": -4}, 51.20 -"S": {"A": 1, "R": -1, "N": 1, "D": 0, "C": -1, "Q": 0, "E": 0, "G": 0, "H": -1, "I": -2, "L": -2, "K": 0, "M": -1, "F": -2, "P": -1, "S": 4, "T": 1, "W": -3, "Y": -2, "V": -2, "B": 0, "Z": 0, "X": 0, "*": -4}, 51.21 -"T": {"A": 0, "R": -1, "N": 0, "D": -1, "C": -1, "Q": -1, "E": -1, "G": -2, "H": -2, "I": -1, "L": -1, "K": -1, "M": -1, "F": -2, "P": -1, "S": 1, "T": 5, "W": -2, "Y": -2, "V": 0, "B": -1, "Z": -1, "X": 0, "*": -4}, 51.22 -"W": {"A": -3, "R": -3, "N": -4, "D": -4, "C": -2, "Q": -2, "E": -3, "G": -2, "H": -2, "I": -3, "L": -2, "K": -3, "M": -1, "F": 1, "P": -4, "S": -3, "T": -2, "W": 11, "Y": 2, "V": -3, "B": -4, "Z": -3, "X": -2, "*": -4}, 51.23 -"Y": {"A": -2, "R": -2, "N": -2, "D": -3, "C": -2, "Q": -1, "E": -2, "G": -3, "H": 2, "I": -1, "L": -1, "K": -2, "M": -1, "F": 3, "P": -3, "S": -2, "T": -2, "W": 2, "Y": 7, "V": -1, "B": -3, "Z": -2, "X": -1, "*": -4}, 51.24 -"V": {"A": 0, "R": -3, "N": -3, "D": -3, "C": -1, "Q": -2, "E": -2, "G": -3, "H": -3, "I": 3, "L": 1, "K": -2, "M": 1, "F": -1, "P": -2, "S": -2, "T": 0, "W": -3, "Y": -1, "V": 4, "B": -3, "Z": -2, "X": -1, "*": -4}, 51.25 -"B": {"A": -2, "R": -1, "N": 3, "D": 4, "C": -3, "Q": 0, "E": 1, "G": -1, "H": 0, "I": -3, "L": -4, "K": 0, "M": -3, "F": -3, "P": -2, "S": 0, "T": -1, "W": -4, "Y": -3, "V": -3, "B": 4, "Z": 0, "X": -1, "*": -4}, 51.26 -"Z": {"A": -1, "R": 0, "N": 0, "D": 1, "C": -3, "Q": 3, "E": 4, "G": -2, "H": 0, "I": -3, "L": -3, "K": 1, "M": -1, "F": -3, "P": -1, "S": 0, "T": -1, "W": -3, "Y": -2, "V": -2, "B": 0, "Z": 4, "X": -1, "*": -4}, 51.27 -"X": {"A": 0, "R": -1, "N": -1, "D": -1, "C": -2, "Q": -1, "E": -1, "G": -1, "H": -1, "I": -1, "L": -1, "K": -1, "M": -1, "F": -1, "P": -2, "S": 0, "T": 0, "W": -2, "Y": -1, "V": -1, "B": -1, "Z": -1, "X": -1, "*": -4}, 51.28 -"*": {"A": -4, "R": -4, "N": -4, "D": -4, "C": -4, "Q": -4, "E": -4, "G": -4, "H": -4, "I": -4, "L": -4, "K": -4, "M": -4, "F": -4, "P": -4, "S": -4, "T": -4, "W": -4, "Y": -4, "V": -4, "B": -4, "Z": -4, "X": -4, "*": 1} 51.29 -} 51.30 - 51.31 -gaps = (-8, -4, -2, -1)
52.1 --- a/lib/allpy_pdb.py Mon Jan 24 21:40:10 2011 +0300 52.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 52.3 @@ -1,67 +0,0 @@ 52.4 - 52.5 -import re 52.6 -from Bio.PDB import PDBParser 52.7 - 52.8 -""" Functions to get pdb information from fasta id 52.9 -and to generate fasta id from pdb information 52.10 - 52.11 -pdb information: code, chain, model 52.12 - 52.13 -TODO: same for local pdb files 52.14 -""" 52.15 - 52.16 -# for pdb-codes 52.17 -re1 = re.compile(r"(^|[^a-z0-9])(?P<code>[0-9][0-9a-z]{3})([^a-z0-9](?P<chain>[0-9a-z ]?)(?P<model>[^a-z0-9]([0-9]{1,3}))?)?", re.I) 52.18 - 52.19 -#~ # for files 52.20 -#~ re2 = re.compile(r"(^)([^^]+\.(ent|pdb))([^a-zA-Z0-9]([0-9A-Za-z ]?)([^a-zA-Z0-9]([0-9]{1,3}))?)?$") 52.21 - 52.22 -def std_id(pdb_id, pdb_chain, pdb_model=None): 52.23 - if pdb_model: 52.24 - return "%s_%s_%s" % \ 52.25 - (pdb_id.lower().strip(), pdb_chain.upper().strip(), pdb_model) 52.26 - else: 52.27 - return "%s_%s" % \ 52.28 - (pdb_id.lower().strip(), pdb_chain.upper().strip()) 52.29 - 52.30 -def pdb_id_parse(ID): 52.31 - match = re1.search(ID) 52.32 - if not match: 52.33 - return None 52.34 - d = match.groupdict() 52.35 - if 'chain' not in d or not d['chain']: 52.36 - d['chain'] = ' ' 52.37 - if 'model' not in d or not d['model']: 52.38 - d['model'] = 0 52.39 - return d 52.40 - 52.41 - 52.42 -def get_structure(file, name): 52.43 - return PDBParser().get_structure(name, file) 52.44 - 52.45 -#~ def std_id_parse(ID): 52.46 - #~ """ 52.47 - #~ Parse standart ID to pdb_code, chain and model 52.48 - #~ """ 52.49 - #~ if '.ent' in ID.lower() or '.pdb' in ID.lower(): 52.50 - #~ # it is file 52.51 - #~ parseO = self.re2.search(ID) # files 52.52 - #~ else: 52.53 - #~ parseO = self.re1.search(ID.lower()) # pdb codes 52.54 - #~ if not parseO: 52.55 - #~ return None 52.56 - #~ parse = parseO.groups() 52.57 - #~ if len(parse) < 2: 52.58 - #~ return None 52.59 - #~ code = parse[1] 52.60 - #~ chain = '' 52.61 - #~ model = None 52.62 - #~ if len(parse) >= 4: 52.63 - #~ chain = parse[3] 52.64 - #~ if chain: 52.65 - #~ chain = chain.upper() 52.66 - #~ if len(parse) >= 6: 52.67 - #~ if parse[5]: 52.68 - #~ model = parse[5] 52.69 - #~ return code, chain, model 52.70 -
53.1 --- a/lib/block.py Mon Jan 24 21:40:10 2011 +0300 53.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 53.3 @@ -1,172 +0,0 @@ 53.4 -#!usr/bin/python 53.5 - 53.6 -import sys 53.7 - 53.8 -import project 53.9 -import sequence 53.10 -import monomer 53.11 -import config 53.12 -from graph import Graph 53.13 -from Bio.PDB import Superimposer 53.14 -from tempfile import NamedTemporaryFile 53.15 -import os 53.16 -from fasta import save_fasta 53.17 - 53.18 -class Block(object): 53.19 - """ Block of alignment 53.20 - 53.21 - Mandatory data: 53.22 - * self.project -- project object, which the block belongs to 53.23 - * self.sequences - set of sequence objects that contain monomers 53.24 - and/or gaps, that constitute the block 53.25 - * self.positions -- sorted list of positions of the project.alignment that 53.26 - are included in the block 53.27 - 53.28 - Don't change self.sequences -- it may be a link to other block.sequences 53.29 - 53.30 - How to create a new block: 53.31 - >>> import project 53.32 - >>> import block 53.33 - >>> proj = project.Project(open("test.fasta")) 53.34 - >>> block1 = block.Block(proj) 53.35 - """ 53.36 - 53.37 - def __init__(self, project, sequences=None, positions=None): 53.38 - """ Builds new block from project 53.39 - 53.40 - if sequences==None, all sequences are used 53.41 - if positions==None, all positions are used 53.42 - """ 53.43 - if sequences == None: 53.44 - sequences = set(project.sequences) # copy 53.45 - if positions == None: 53.46 - positions = range(len(project)) 53.47 - self.project = project 53.48 - self.sequences = sequences 53.49 - self.positions = positions 53.50 - 53.51 - def save_fasta(self, out_file, long_line=70, gap='-'): 53.52 - """ Saves alignment to given file in fasta-format 53.53 - 53.54 - No changes in the names, descriptions or order of the sequences 53.55 - are made. 53.56 - """ 53.57 - for sequence in self.sequences: 53.58 - alignment_monomers = self.project.alignment[sequence] 53.59 - block_monomers = [alignment_monomers[i] for i in self.positions] 53.60 - string = ''.join([m.type.code1 if m else '-' for m in block_monomers]) 53.61 - save_fasta(out_file, string, sequence.name, sequence.description, long_line) 53.62 - 53.63 - def geometrical_cores(self, max_delta=config.delta, 53.64 - timeout=config.timeout, minsize=config.minsize, 53.65 - ac_new_atoms=config.ac_new_atoms, 53.66 - ac_count=config.ac_count): 53.67 - """ Returns length-sorted list of blocks, representing GCs 53.68 - 53.69 - max_delta -- threshold of distance spreading 53.70 - timeout -- Bron-Kerbosh timeout (then fast O(n ln n) algorithm) 53.71 - minsize -- min size of each core 53.72 - ac_new_atoms -- min part or new atoms in new alternative core 53.73 - current GC is compared with each of already selected GCs 53.74 - if difference is less then ac_new_atoms, current GC is skipped 53.75 - difference = part of new atoms in current core 53.76 - ac_count -- max number of cores (including main core) 53.77 - -1 means infinity 53.78 - If more than one pdb chain for some sequence provided, consider all of them 53.79 - cost is calculated as 1 / (delta + 1) 53.80 - delta in [0, +inf) => cost in (0, 1] 53.81 - """ 53.82 - nodes = self.positions 53.83 - lines = {} 53.84 - for i in self.positions: 53.85 - for j in self.positions: 53.86 - if i < j: 53.87 - distances = [] 53.88 - for sequence in self.sequences: 53.89 - for chain in sequence.pdb_chains: 53.90 - m1 = self.project.alignment[sequence][i] 53.91 - m2 = self.project.alignment[sequence][j] 53.92 - if m1 and m2: 53.93 - r1 = sequence.pdb_residues[chain][m1] 53.94 - r2 = sequence.pdb_residues[chain][m2] 53.95 - ca1 = r1['CA'] 53.96 - ca2 = r2['CA'] 53.97 - d = ca1 - ca2 # Bio.PDB feature 53.98 - distances.append(d) 53.99 - if len(distances) >= 2: 53.100 - delta = max(distances) - min(distances) 53.101 - if delta <= max_delta: 53.102 - lines[Graph.line(i, j)] = 1.0 / (1.0 + max_delta) 53.103 - graph = Graph(nodes, lines) 53.104 - cliques = graph.cliques(timeout=timeout, minsize=minsize) 53.105 - GCs = [] 53.106 - for clique in cliques: 53.107 - for GC in GCs: 53.108 - if len(clique - set(GC.positions)) < ac_new_atoms * len(clique): 53.109 - break 53.110 - else: 53.111 - GCs.append(Block(self.project, self.sequences, clique)) 53.112 - if ac_count != -1 and len(GCs) >= ac_count: 53.113 - break 53.114 - return GCs 53.115 - 53.116 - def xstring(self, x='X', gap='-'): 53.117 - """ Returns string consisting of gap chars and chars x at self.positions 53.118 - 53.119 - Length of returning string = length of project 53.120 - """ 53.121 - monomers = [False] * len(self.project) 53.122 - for i in self.positions: 53.123 - monomers[i] = True 53.124 - return ''.join([x if m else gap for m in monomers]) 53.125 - 53.126 - def save_xstring(self, out_file, name, description='', x='X', gap='-', long_line=70): 53.127 - """ Save xstring and name in fasta format """ 53.128 - save_fasta(out_file, self.xstring(x=x, gap=gap), name, description, long_line) 53.129 - 53.130 - def monomers(self, sequence): 53.131 - """ Iterates monomers of this sequence from this block """ 53.132 - alignment_sequence = self.project.alignment[sequence] 53.133 - return (alignment_sequence[i] for i in self.positions) 53.134 - 53.135 - def ca_atoms(self, sequence, pdb_chain): 53.136 - """ Iterates Ca-atom of monomers of this sequence from this block """ 53.137 - return (sequence.pdb_residues[pdb_chain][monomer] for monomer in self.monomers()) 53.138 - 53.139 - def sequences_chains(self): 53.140 - """ Iterates pairs (sequence, chain) """ 53.141 - for sequence in self.project.sequences: 53.142 - if sequence in self.sequences: 53.143 - for chain in sequence.pdb_chains: 53.144 - yield (sequence, chain) 53.145 - 53.146 - def superimpose(self): 53.147 - """ Superimpose all pdb_chains in this block """ 53.148 - sequences_chains = list(self.sequences_chains()) 53.149 - if len(sequences_chains) >= 1: 53.150 - sup = Superimposer() 53.151 - fixed_sequence, fixed_chain = sequences_chains.pop() 53.152 - fixed_atoms = self.ca_atoms(fixed_sequence, fixed_chain) 53.153 - for sequence, chain in sequences_chains: 53.154 - moving_atoms = self.ca_atoms(sequence, chain) 53.155 - sup.set_atoms(fixed_atoms, moving_atoms) 53.156 - # Apply rotation/translation to the moving atoms 53.157 - sup.apply(moving_atoms) 53.158 - 53.159 - def pdb_save(self, out_file): 53.160 - """ Save all sequences 53.161 - 53.162 - Returns {(sequence, chain): CHAIN} 53.163 - CHAIN is chain letter in new file 53.164 - """ 53.165 - tmp_file = NamedTemporaryFile(delete=False) 53.166 - tmp_file.close() 53.167 - 53.168 - for sequence, chain in self.sequences_chains(): 53.169 - sequence.pdb_save(tmp_file.name, chain) 53.170 - # TODO: read from tmp_file.name 53.171 - # change CHAIN 53.172 - # add to out_file 53.173 - 53.174 - os.unlink(NamedTemporaryFile) 53.175 -
54.1 --- a/lib/config.py Mon Jan 24 21:40:10 2011 +0300 54.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 54.3 @@ -1,18 +0,0 @@ 54.4 - 54.5 -delta = 2.0 # for geometrical core building 54.6 -minsize = 20 # min size of returning cores 54.7 -maxabsent = 0.15 # deprecated? 54.8 - 54.9 -# pdb download url (XXXX is pdb code place) 54.10 -pdb_url = 'http://www.pdb.org/pdb/files/%s.pdb' 54.11 -pdb_dir = '/tmp/%s.pdb' 54.12 -timeout = 10 # time in sec. for BRON-KERBOSH algorithm 54.13 - 54.14 - 54.15 -# min part or new atoms in new alternative core 54.16 -ac_new_atoms = 0.5 54.17 - 54.18 -# max number of cores (including main core) 54.19 -ac_count = 5 54.20 - 54.21 -
55.1 --- a/lib/fasta.py Mon Jan 24 21:40:10 2011 +0300 55.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 55.3 @@ -1,21 +0,0 @@ 55.4 -def save_fasta(out_file, string, name, description='', long_line=70): 55.5 - """ Saves given string to out_file in fasta_format 55.6 - 55.7 - Splits long lines to substrings of length=long_line 55.8 - To prevent this, set long_line=None 55.9 - """ 55.10 - out_file.write(">%(name)s %(description)s \n" % {'name':name, 'description':description}) 55.11 - if long_line: 55.12 - for i in range(0, len(string) // long_line + 1): 55.13 - out_file.write("%s \n" % string[i*long_line : i*long_line + long_line]) 55.14 - else: 55.15 - out_file.write("%s \n" % string) 55.16 - 55.17 -def determine_long_line(in_file): 55.18 - """ Returns maximum sequence line length in fasta file """ 55.19 - sequences = in_file.read().split('>') 55.20 - for sequence in sequences[1:]: 55.21 - lines = sequence.split('\n')[1:] 55.22 - if len(lines) >= 2: 55.23 - return len(lines[0].strip()) 55.24 - return 70
56.1 --- a/lib/graph.py Mon Jan 24 21:40:10 2011 +0300 56.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 56.3 @@ -1,288 +0,0 @@ 56.4 -# -*- coding: utf-8 -*- 56.5 - 56.6 -from datetime import datetime, timedelta 56.7 -from copy import copy 56.8 - 56.9 -class TimeoutError(Exception): 56.10 - pass 56.11 - 56.12 - 56.13 - 56.14 -class Graph(object): 56.15 - """ Undirected weighted graph 56.16 - 56.17 - Data: 56.18 - nodes -- set of elements 56.19 - lines -- {line: cost}. 56.20 - line is frozenset([e1, e2]) 56.21 - cost is float in (0, 1] or 1 (if all lines are equal) 56.22 - 56.23 - >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1}) 56.24 - >>> g.fast_cliques() 56.25 - Fast algorithm started 56.26 - [frozenset([1, 2]), frozenset([3])] 56.27 - >>> g = Graph(set([1,2,3]), {frozenset([1,2]): 1, frozenset([1,1]): 1}) 56.28 - >>> g.fast_cliques() 56.29 - Fast algorithm started 56.30 - [frozenset([1, 2]), frozenset([3])] 56.31 - >>> g = Graph(set([1,2,3,4]), {frozenset([1,2]): 0.98, frozenset([1,3]): 0.98, 56.32 - ... frozenset([2,3]): 0.1, frozenset([1,1]): 1}) 56.33 - >>> g.fast_cliques() 56.34 - Fast algorithm started 56.35 - [frozenset([1, 2, 3]), frozenset([4])] 56.36 - >>> g.bron_kerbosh() 56.37 - Bron and Kerbosh algorithm started 56.38 - [frozenset([1, 2, 3]), frozenset([4])] 56.39 - >>> g.cliques() 56.40 - Bron and Kerbosh algorithm started 56.41 - [frozenset([1, 2, 3])] 56.42 - """ 56.43 - 56.44 - def __init__(self, nodes=None, lines=None): 56.45 - if not nodes: 56.46 - nodes = set() 56.47 - if not lines: 56.48 - lines = dict() 56.49 - self.nodes = set(nodes) # copy 56.50 - self.lines = {} 56.51 - for line, cost in lines.items(): 56.52 - if len(line) == 2 and line.issubset(self.nodes): 56.53 - self.lines[line] = cost 56.54 - 56.55 - @staticmethod 56.56 - def line(k1, k2): 56.57 - """ Construct object, representing line of graph """ 56.58 - return frozenset([k1, k2]) 56.59 - 56.60 - def bounded(self, k1, k2): 56.61 - """ Return if these two nodes of the graph are bounded with line """ 56.62 - return k1 == k2 or Graph.line(k1, k2) in self.lines 56.63 - 56.64 - def count_one(self, node): 56.65 - """ Returns number of connections of this node """ 56.66 - return len([node1 for node1 in self.nodes if self.bounded(node, node1)]) - 1 56.67 - 56.68 - def cost_one(self, node): 56.69 - """ Returns sum of costs of all connections of this node """ 56.70 - return sum([self.lines.get(Graph.line(node, node1), 0) 56.71 - for node1 in self.nodes if node != node1]) 56.72 - 56.73 - def count_all(self): 56.74 - """ Returns {node: number of connections of this node} """ 56.75 - c = dict([(node, 0) for node in self.nodes]) 56.76 - for line in self.lines: 56.77 - for node in line: 56.78 - c[node] += 1 56.79 - return c 56.80 - 56.81 - 56.82 - def drop_node(self, node): 56.83 - """ Remove node and all involved lines """ 56.84 - for node1 in self.nodes: 56.85 - self.lines.pop(Graph.line(node, node1), None) 56.86 - self.nodes.discard(node) 56.87 - 56.88 - def add_node(self, node, parent_graph): 56.89 - """ Add node and corresponding lines from parent_graph 56.90 - 56.91 - Added lines should be contained in self graph 56.92 - (takes care of hanging lines) 56.93 - """ 56.94 - self.nodes.add(node) 56.95 - for node1 in self.nodes: 56.96 - line = Graph.line(node, node1) 56.97 - if line in parent_graph.lines: 56.98 - self.lines[line] = parent_graph.lines[line] 56.99 - 56.100 - def drop_nodes(self, nodes): 56.101 - """ Run drop_node for each of given nodes 56.102 - 56.103 - Returns if nodes was not empty (ugly beauty) 56.104 - """ 56.105 - for node in nodes: 56.106 - self.drop_node(node) 56.107 - return bool(nodes) 56.108 - 56.109 - def drop_if_count(self, minsize): 56.110 - """ Run drop_node for each node, that has less than minsize lines """ 56.111 - while True: 56.112 - if not self.drop_nodes([node for (node, count) 56.113 - in self.count_all().items() if count < minsize]): 56.114 - break 56.115 - 56.116 - def bron_kerbosh(self, timeout=-1, minsize=1): 56.117 - """ Bron and Kerboch algorithm implementation 56.118 - 56.119 - returns list of cliques 56.120 - clique is frozenset 56.121 - if timeout=-1, it means infinity 56.122 - if timeout has happened, raises TimeoutError 56.123 - 56.124 - lava flow 56.125 - """ 56.126 - print 'Bron and Kerbosh algorithm started' 56.127 - cliques = [] 56.128 - 56.129 - depth = 0 56.130 - list_candidates = [copy(self.nodes)] 56.131 - list_used = [set()] 56.132 - compsub = [] 56.133 - 56.134 - start_time = datetime.now() 56.135 - timeout_timedelta = timedelta(timeout) 56.136 - 56.137 - while True: # ПОКА... 56.138 - if depth == -1: 56.139 - break # ВСЕ! Все рекурсии (итерации) пройдены 56.140 - candidates = copy(list_candidates[depth]) 56.141 - used = copy(list_used[depth]) 56.142 - if not candidates: # ПОКА candidates НЕ пусто 56.143 - depth -= 1 56.144 - if compsub: 56.145 - compsub.pop() 56.146 - continue 56.147 - 56.148 - # И used НЕ содержит вершины, СОЕДИНЕННОЙ СО ВСЕМИ вершинами из candidates 56.149 - # (все из used НЕ соединены хотя бы с 1 из candidates) 56.150 - used_candidates = False 56.151 - 56.152 - for used1 in used: 56.153 - for candidates1 in candidates: 56.154 - if not self.bounded(used1, candidates1): 56.155 - break 56.156 - else: 56.157 - used_candidates = True 56.158 - 56.159 - if used_candidates: 56.160 - depth -= 1 56.161 - 56.162 - if compsub: 56.163 - compsub.pop() 56.164 - continue 56.165 - 56.166 - # Выбираем вершину v из candidates и добавляем ее в compsub 56.167 - v = candidates.pop() 56.168 - candidates.add(v) 56.169 - compsub.append(v) 56.170 - # Формируем new_candidates и new_used, удаляя из candidates и used вершины, НЕ соединенные с v 56.171 - # (то есть, оставляя только соединенные с v) 56.172 - new_candidates = set() 56.173 - for candidates1 in candidates: 56.174 - if self.bounded(candidates1, v) and candidates1 != v: 56.175 - new_candidates.add(candidates1) 56.176 - 56.177 - new_used = set() 56.178 - for used1 in used: 56.179 - if self.bounded(used1, v) and used1 != v: 56.180 - new_used.add(used1) 56.181 - 56.182 - # Удаляем v из candidates и помещаем в used 56.183 - list_candidates[depth].remove(v) 56.184 - list_used[depth].add(v) 56.185 - # ЕСЛИ new_candidates и new_used пусты 56.186 - if not new_candidates and not new_used: 56.187 - # compsub ? клика 56.188 - if len(compsub) >= minsize: 56.189 - cliques.append(frozenset(compsub)) 56.190 - else: 56.191 - # ИНАЧЕ рекурсивно вызываем bron_kerbosh(new_candidates, new_used) 56.192 - depth += 1 56.193 - 56.194 - # TIMEOUT check start 56.195 - if timeout != -1: 56.196 - if datetime.now() - start_time > timeout_timedelta: 56.197 - raise TimeoutError 56.198 - # TIMEOUT check end 56.199 - 56.200 - if depth >= len(list_candidates): 56.201 - list_candidates.append(set()) 56.202 - list_used.append(set()) 56.203 - 56.204 - list_candidates[depth] = copy(new_candidates) 56.205 - list_used[depth] = copy(new_used) 56.206 - 56.207 - continue 56.208 - 56.209 - # Удаляем v из compsub 56.210 - if compsub: 56.211 - compsub.pop() 56.212 - 56.213 - return cliques 56.214 - 56.215 - 56.216 - def fast_cliques(self, minsize=1): 56.217 - """ returns list of cliques 56.218 - 56.219 - clique is frozenset 56.220 - """ 56.221 - print 'Fast algorithm started' 56.222 - cliques = [] 56.223 - 56.224 - while True: 56.225 - graph = Graph(self.nodes, self.lines) 56.226 - for clique in cliques: 56.227 - graph.drop_nodes(clique) 56.228 - if not graph.nodes: 56.229 - break 56.230 - 56.231 - while True: 56.232 - # drop nodes, while its is possible 56.233 - if len(graph.nodes) == 1: 56.234 - break 56.235 - c = graph.count_all() 56.236 - min_count = min(c.values()) 56.237 - bad_nodes = [node for (node, count) in c.items() if count == min_count] 56.238 - if len(bad_nodes) == len(graph.nodes) and min_count != 0: 56.239 - break 56.240 - 56.241 - costs = dict([(node, graph.cost_one(node)) for node in bad_nodes]) 56.242 - min_cost = min(costs.values()) 56.243 - for node, cost in costs.items(): 56.244 - if cost == min_cost: 56.245 - graph.drop_node(node) 56.246 - break 56.247 - 56.248 - while True: 56.249 - # add nodes, while its is possible 56.250 - candidats = {} 56.251 - for node in self.nodes: 56.252 - c = len([i for i in graph.nodes if self.bounded(node, i)]) 56.253 - if c == len(self.nodes): 56.254 - graph1 = Graph(graph.nodes, graph.lines) 56.255 - graph1.add_node(node, self) 56.256 - candidats[node] = graph1.cost_one(node) 56.257 - if not candidats: 56.258 - break 56.259 - 56.260 - max_cost = max(candidats.values()) 56.261 - node = [node for (node, cost) in candidats.items() if cost == max_cost][0] 56.262 - graph.add_node(node, self) 56.263 - 56.264 - cliques.append(frozenset(graph.nodes)) 56.265 - 56.266 - return cliques 56.267 - 56.268 - 56.269 - def cliques(self, timeout=-1, minsize=1): 56.270 - """ returns length-sorted list of cliques 56.271 - 56.272 - clique is frozenset 56.273 - 56.274 - can change self! 56.275 - 56.276 - try to execute bron_kerbosh 56.277 - if it raises TimeoutError, executes fast_cliques 56.278 - """ 56.279 - 56.280 - self.drop_if_count(minsize) 56.281 - 56.282 - try: 56.283 - cliques = self.bron_kerbosh(timeout, minsize) 56.284 - cliques.sort(key=lambda clique: len(clique), reverse=True) 56.285 - except TimeoutError: 56.286 - cliques = self.fast_cliques(minsize) 56.287 - return cliques 56.288 - 56.289 -if __name__ == "__main__": 56.290 - import doctest 56.291 - doctest.testmod()
57.1 --- a/lib/monomer.py Mon Jan 24 21:40:10 2011 +0300 57.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 57.3 @@ -1,88 +0,0 @@ 57.4 -#!/usr/bin/python 57.5 - 57.6 -from allpy_data.AAdict import AAdict 57.7 -import Bio.PDB 57.8 - 57.9 -index_code3 = {} 57.10 -index_code1_protein = {} 57.11 -index_name = {} 57.12 - 57.13 - 57.14 -class MonomerType(object): 57.15 - """ Monomer type 57.16 - 57.17 - name -- string like "Valine" 57.18 - code1 -- one-letter code (in upper case) 57.19 - code3 -- three-letter code (in upper case) 57.20 - is_modified -- True of False 57.21 - """ 57.22 - def __init__(self, name, code1, code3, is_modified=False): 57.23 - self.name = name.capitalize() 57.24 - self.code1 = code1.upper() 57.25 - self.code3 = code3.upper() 57.26 - self.is_modified = bool(is_modified) # ugly 57.27 - 57.28 - index_name[self.name] = self 57.29 - index_code3[self.code3] = self 57.30 - 57.31 - @staticmethod 57.32 - def from_code3(code3): 57.33 - return index_code3[code3.upper()] 57.34 - @staticmethod 57.35 - def from_name(name): 57.36 - return index_name[name.capitalize()] 57.37 - 57.38 - @staticmethod 57.39 - def from_pdb_residue(pdb_residue): 57.40 - return MonomerType.from_code3(pdb_residue.get_resname()) 57.41 - 57.42 - # TO DISCUSS 57.43 - def __eq__(self, other): 57.44 - return self.code1 == other.code1 57.45 - 57.46 - def __ne__(self, other): 57.47 - return not (self == other) 57.48 - 57.49 - 57.50 -class Monomer(object): 57.51 - """ Monomer 57.52 - 57.53 - type -- link to MonomerType object 57.54 - """ 57.55 - def __init__(self, monomer_type): 57.56 - self.type = monomer_type 57.57 - 57.58 - def __eq__(self, other): 57.59 - return self.type == other.type 57.60 - 57.61 - def __ne__(self, other): 57.62 - return not (self == other) 57.63 - 57.64 -class AminoAcidType(MonomerType): 57.65 - def __init__(self, name, code1, code3, is_modified=False): 57.66 - MonomerType.__init__(self, name, code1, code3, is_modified) 57.67 - if not is_modified: 57.68 - index_code1_protein[self.code1] = self 57.69 - 57.70 - @staticmethod 57.71 - def from_code1(code1): 57.72 - return index_code1_protein[code1.upper()] 57.73 - def instance(self): 57.74 - """ Returns new AminoAcid object of this type """ 57.75 - return AminoAcid(self) 57.76 - 57.77 - 57.78 -class AminoAcid(Monomer): 57.79 - """ Amino acid """ 57.80 - pass 57.81 - 57.82 - 57.83 -# prepare all aminoacids 57.84 - 57.85 -for code3, data in AAdict.items(): 57.86 - code1, m_type, is_modified, none, name = data 57.87 - if m_type == 'p': 57.88 - AminoAcidType(name, code1, code3, is_modified) 57.89 - 57.90 -del code3, data, code1, m_type, is_modified, none, name 57.91 -
58.1 --- a/lib/project.py Mon Jan 24 21:40:10 2011 +0300 58.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 58.3 @@ -1,255 +0,0 @@ 58.4 -#!/usr/bin/python 58.5 - 58.6 -""" 58.7 - "I will not use abbrev." 58.8 - "I will always finish what I st" 58.9 - - Bart Simpson 58.10 - 58.11 -""" 58.12 - 58.13 -import sequence 58.14 -Sequence = sequence.Sequence 58.15 -from monomer import AminoAcidType 58.16 -import allpy_data 58.17 -from tempfile import NamedTemporaryFile 58.18 -import os 58.19 -import block 58.20 -from fasta import save_fasta 58.21 - 58.22 -Block = block.Block 58.23 - 58.24 -class Project(object): 58.25 - """ Alignment representing class 58.26 - 58.27 - Mandatory data: 58.28 - * sequences -- list of Sequence objects. Sequences don't contain gaps 58.29 - - see sequence.py module 58.30 - * alignment -- dict 58.31 - {<Sequence object>:[<Monomer object>,None,<Monomer object>]} 58.32 - keys are the Sequence objects, values are the lists, which 58.33 - contain monomers of those sequences or None for gaps in the 58.34 - corresponding sequence of 58.35 - alignment 58.36 - 58.37 - """ 58.38 - def __init__(self, *args): 58.39 - """overloaded constructor 58.40 - 58.41 - Project() -> new empty Project 58.42 - Project(sequences, alignment) -> new Project with sequences and 58.43 - alignment initialized from arguments 58.44 - Project(fasta_file) -> new Project, read alignment and sequences 58.45 - from fasta file 58.46 - 58.47 - """ 58.48 - if len(args)>1:#overloaded constructor 58.49 - self.sequences=args[0] 58.50 - self.alignment=args[1] 58.51 - elif len(args)==0: 58.52 - self.sequences=[] 58.53 - self.alignment={} 58.54 - else: 58.55 - self.sequences,self.alignment=Project.from_fasta(args[0]) 58.56 - 58.57 - def __len__(self): 58.58 - """ Returns width, ie length of each sequence with gaps """ 58.59 - return max([len(line) for line in self.alignment.values()]) 58.60 - 58.61 - def thickness(self): 58.62 - """ The number of sequences in alignment (it's thickness). """ 58.63 - return len(self.alignment) 58.64 - 58.65 - def calc_identity(self): 58.66 - """ Calculate the identity of alignment positions for colouring. 58.67 - 58.68 - For every (row, column) in alignment the percentage of the exactly 58.69 - same residue in the same column in the alignment is calculated. 58.70 - The data structure is just like the Project.alignment, but istead of 58.71 - monomers it contains float percentages. 58.72 - """ 58.73 - # Oh, God, that's awful! Absolutely not understandable. 58.74 - # First, calculate percentages of amino acids in every column 58.75 - contribution = 1.0 / len(self.sequences) 58.76 - all_columns = [] 58.77 - for position in range(len(self)): 58.78 - column_percentage = {} 58.79 - for seq in self.alignment: 58.80 - if self.alignment[seq][position] is not None: 58.81 - aa = self.alignment[seq][position].code 58.82 - else: 58.83 - aa = None 58.84 - if aa in allpy_data.amino_acids: 58.85 - if aa in column_percentage.keys(): 58.86 - column_percentage[aa] += contribution 58.87 - else: 58.88 - column_percentage[aa] = contribution 58.89 - all_columns.append(column_percentage) 58.90 - # Second, map these percentages onto the alignment 58.91 - self.identity_percentages = {} 58.92 - for seq in self.sequences: 58.93 - self.identity_percentages[seq] = [] 58.94 - for seq in self.identity_percentages: 58.95 - line = self.identity_percentages[seq] 58.96 - for position in range(len(self)): 58.97 - if self.alignment[seq][position] is not None: 58.98 - aa = self.alignment[seq][position].code 58.99 - else: 58.100 - aa = None 58.101 - line.append(all_columns[position].get(aa)) 58.102 - return self.identity_percentages 58.103 - 58.104 - @staticmethod 58.105 - def from_fasta(file, monomer_kind=AminoAcidType): 58.106 - """ Import data from fasta file 58.107 - 58.108 - monomer_kind is class, inherited from MonomerType 58.109 - 58.110 - >>> import project 58.111 - >>> sequences,alignment=project.Project.from_fasta(open("test.fasta")) 58.112 - """ 58.113 - import re 58.114 - 58.115 - sequences = [] 58.116 - alignment = {} 58.117 - 58.118 - raw_sequences = file.read().split(">") 58.119 - if len(raw_sequences) <= 1: 58.120 - raise Exception("Wrong format of fasta-file %s" % file.name) 58.121 - 58.122 - raw_sequences = raw_sequences[1:] #ignore everything before the first > 58.123 - for raw in raw_sequences: 58.124 - parsed_raw_sequence = raw.split("\n") 58.125 - parsed_raw_sequence = [s.strip() for s in parsed_raw_sequence] 58.126 - name_and_description = parsed_raw_sequence[0] 58.127 - name_and_description = name_and_description.split(" ",1) 58.128 - if len(name_and_description) == 2: 58.129 - name, description = name_and_description 58.130 - elif len(name_and_description) == 1: 58.131 - #if there is description 58.132 - name = name_and_description[0] 58.133 - description = '' 58.134 - else: 58.135 - raise Exception("Wrong name of sequence %(name)$ fasta-file %(file)s" % \ 58.136 - {'name': name, 'file': file.name}) 58.137 - 58.138 - if len(parsed_raw_sequence) <= 1: 58.139 - raise Exception("Wrong format of sequence %(name)$ fasta-file %(file)s" % \ 58.140 - {'name': name, 'file': file.name}) 58.141 - string = "" 58.142 - for piece in parsed_raw_sequence[1:]: 58.143 - piece_without_whitespace_chars = re.sub("\s", "", piece) 58.144 - string += piece_without_whitespace_chars 58.145 - monomers = [] #convert into Monomer objects 58.146 - alignment_list = [] #create the respective list in alignment dict 58.147 - for current_monomer in string: 58.148 - if current_monomer not in ["-", ".", "~"]: 58.149 - monomers.append(monomer_kind.from_code1(current_monomer).instance()) 58.150 - alignment_list.append(monomers[-1]) 58.151 - else: 58.152 - alignment_list.append(None) 58.153 - sequence = Sequence(monomers, name, description) 58.154 - sequences.append(sequence) 58.155 - alignment[sequence] = alignment_list 58.156 - return sequences, alignment 58.157 - 58.158 - 58.159 - @staticmethod 58.160 - def from_sequences(*sequences): 58.161 - """ Constructs new alignment from sequences 58.162 - 58.163 - Add None's to right end to make equal lengthes of alignment sequences 58.164 - """ 58.165 - project = Project() 58.166 - project.sequences = sequences 58.167 - max_length = max(len(sequence) for sequence in sequences) 58.168 - for sequence in sequences: 58.169 - gaps_count = max_length - len(sequence) 58.170 - project.alignment[sequence] = sequence.monomers + [None] * gaps_count 58.171 - return project 58.172 - 58.173 - def save_fasta(self, out_file, long_line=70, gap='-'): 58.174 - """ Saves alignment to given file 58.175 - 58.176 - Splits long lines to substrings of length=long_line 58.177 - To prevent this, set long_line=None 58.178 - """ 58.179 - Block(self).save_fasta(out_file, long_line=long_line, gap=gap) 58.180 - 58.181 - def muscle_align(self): 58.182 - """ Simple align ths alignment using sequences (muscle) 58.183 - 58.184 - uses old Monomers and Sequences objects 58.185 - """ 58.186 - tmp_file = NamedTemporaryFile(delete=False) 58.187 - self.save_fasta(tmp_file) 58.188 - tmp_file.close() 58.189 - os.system("muscle -in %(tmp)s -out %(tmp)s" % {'tmp': tmp_file.name}) 58.190 - sequences, alignment = Project.from_fasta(open(tmp_file.name)) 58.191 - for sequence in self.sequences: 58.192 - try: 58.193 - new_sequence = [i for i in sequences if sequence==i][0] 58.194 - except: 58.195 - raise Exception("Align: Cann't find sequence %s in muscle output" % \ 58.196 - sequence.name) 58.197 - old_monomers = iter(sequence.monomers) 58.198 - self.alignment[sequence] = [] 58.199 - for monomer in alignment[new_sequence]: 58.200 - if not monomer: 58.201 - self.alignment[sequence].append(monomer) 58.202 - else: 58.203 - old_monomer = old_monomers.next() 58.204 - if monomer != old_monomer: 58.205 - raise Exception("Align: alignment errors") 58.206 - self.alignment[sequence].append(old_monomer) 58.207 - os.unlink(tmp_file.name) 58.208 - 58.209 - 58.210 - def column(self, sequence=None, sequences=None, original=None): 58.211 - """ returns list of columns of alignment 58.212 - 58.213 - sequence or sequences: 58.214 - if sequence is given, then column is (original_monomer, monomer) 58.215 - if sequences is given, then column is (original_monomer, {sequence: monomer}) 58.216 - if both of them are given, it is an error 58.217 - original (Sequence type): 58.218 - if given, this filters only columns represented by original sequence 58.219 - """ 58.220 - if sequence and sequences: 58.221 - raise Exception("Wrong usage. read help") 58.222 - indexes = dict([(v, k) for( k, v) in enumerate(self.sequences)]) 58.223 - alignment = self.alignment.items() 58.224 - alignment.sort(key=lambda i: indexes[i[0]]) 58.225 - alignment = [monomers for seq, monomers in alignment] 58.226 - for column in zip(*alignment): 58.227 - if not original or column[indexes[original]]: 58.228 - if sequence: 58.229 - yield (column[indexes[original]], column[indexes[sequence]]) 58.230 - else: 58.231 - yield (column[indexes[original]], 58.232 - dict([(s, column[indexes[s]]) for s in sequences])) 58.233 - 58.234 - def pdb_auto_add(self, conformity_file=None): 58.235 - """ Adds pdb information to each sequence 58.236 - 58.237 - TODO: conformity_file 58.238 - """ 58.239 - conformity = {} 58.240 - 58.241 - for sequence in self.sequences: 58.242 - try: 58.243 - sequence.pdb_auto_add(conformity.get(sequence.name, None)) 58.244 - except Exception, t: 58.245 - print "Cann't add pdb information about chain %s:" % sequence.name 58.246 - print t 58.247 - 58.248 - def secstr(self, secuence, pdb_chain, gap='-'): 58.249 - """ Returns string representing secondary structure """ 58.250 - return ''.join([ 58.251 - (secuence.pdb_secstr[pdb_chain][m] if secuence.secstr_has(pdb_chain, m) else gap) 58.252 - for m in self.alignment[secuence]]) 58.253 - 58.254 - def save_secstr(self, out_file, secuence, pdb_chain, 58.255 - name, description='', gap='-', long_line=70): 58.256 - """ Save secondary structure and name in fasta format """ 58.257 - save_fasta(out_file, self.secstr(secuence, pdb_chain, gap), name, description, long_line) 58.258 -
59.1 --- a/lib/sequence.py Mon Jan 24 21:40:10 2011 +0300 59.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 59.3 @@ -1,199 +0,0 @@ 59.4 -#!/usr/bin/python 59.5 -# -*- coding: utf-8 -*- 59.6 - 59.7 -from monomer import AminoAcidType 59.8 -from Bio.PDB import CaPPBuilder, PDBIO 59.9 -from Bio.PDB.DSSP import make_dssp_dict 59.10 -from allpy_pdb import std_id, pdb_id_parse, get_structure 59.11 -import project 59.12 -import sys 59.13 -import config 59.14 -import os.path 59.15 -import urllib2 59.16 -from tempfile import NamedTemporaryFile 59.17 -import os 59.18 - 59.19 - 59.20 -class Sequence(object): 59.21 - """ Sequence of Monomers 59.22 - 59.23 - Mandatory data: 59.24 - * name -- str with the name of sequence 59.25 - * description -- str with description of the sequence 59.26 - * monomers -- list of monomer objects (aminoacids or nucleotides) 59.27 - * pdb_chains -- list of Bio.PDB.Chain's 59.28 - * pdb_files -- dictionary like {Bio.PDB.Chain: file_obj} 59.29 - 59.30 - * pdb_residues -- dictionary like {Bio.PDB.Chain: {Monomer: Bio.PDB.Residue}} 59.31 - * pdb_secstr -- dictionary like {Bio.PDB.Chain: {Monomer: 'Secondary structure'}} 59.32 - Code Secondary structure 59.33 - H ?-helix 59.34 - B Isolated ?-bridge residue 59.35 - E Strand 59.36 - G 3-10 helix 59.37 - I ?-helix 59.38 - T Turn 59.39 - S Bend 59.40 - - Other 59.41 - 59.42 - 59.43 - ?TODO: global pdb_structures 59.44 - """ 59.45 - def __init__(self, monomers=None, name='', description=""): 59.46 - if not monomers: 59.47 - monomers = [] 59.48 - self.name = name 59.49 - self.description = description 59.50 - self.monomers = monomers 59.51 - self.pdb_chains = [] 59.52 - self.pdb_files = {} 59.53 - self.pdb_residues = {} 59.54 - self.pdb_secstr = {} 59.55 - 59.56 - def __len__(self): 59.57 - return len(self.monomers) 59.58 - 59.59 - def __str__(self): 59.60 - """ Returns sequence in one-letter code """ 59.61 - return ''.join([monomer.type.code1 for monomer in self.monomers]) 59.62 - 59.63 - def __eq__(self, other): 59.64 - """ Returns if all corresponding monomers of this sequences are equal 59.65 - 59.66 - If lengths of sequences are not equal, returns False 59.67 - """ 59.68 - return len(self) == len(other) and \ 59.69 - all([a==b for a, b in zip(self.monomers, other.monomers)]) 59.70 - 59.71 - def __ne__(self, other): 59.72 - return not (self == other) 59.73 - 59.74 - def pdb_chain_add(self, pdb_file, pdb_id, pdb_chain, pdb_model=0): 59.75 - """ Reads Pdb chain from file 59.76 - 59.77 - and align each Monomer with PDB.Residue (TODO) 59.78 - """ 59.79 - name = std_id(pdb_id, pdb_chain, pdb_model) 59.80 - structure = get_structure(pdb_file, name) 59.81 - try: 59.82 - chain = structure[pdb_model][pdb_chain] 59.83 - except: 59.84 - chain = list(list(structure)[0])[0] 59.85 - self.pdb_chains.append(chain) 59.86 - self.pdb_residues[chain] = {} 59.87 - self.pdb_secstr[chain] = {} 59.88 - pdb_sequence = Sequence.from_pdb_chain(chain) 59.89 - alignment = project.Project.from_sequences(self, pdb_sequence) 59.90 - alignment.muscle_align() 59.91 - for monomer, pdb_monomer in alignment.column(sequence=pdb_sequence, original=self): 59.92 - if pdb_sequence.pdb_has(chain, pdb_monomer): 59.93 - residue = pdb_sequence.pdb_residues[chain][pdb_monomer] 59.94 - self.pdb_residues[chain][monomer] = residue 59.95 - self.pdb_files[chain] = pdb_file 59.96 - 59.97 - def pdb_unload(self): 59.98 - """ Delete all pdb-connected links """ 59.99 - #~ gc.get_referrers(self.pdb_chains[0]) 59.100 - self.pdb_chains = [] 59.101 - self.pdb_residues = {} 59.102 - self.pdb_secstr = {} # FIXME 59.103 - self.pdb_files = {} # FIXME 59.104 - 59.105 - @staticmethod 59.106 - def from_str(fasta_str, name='', description='', monomer_kind=AminoAcidType): 59.107 - """ Import data from one-letter code 59.108 - 59.109 - monomer_kind is class, inherited from MonomerType 59.110 - """ 59.111 - monomers = [monomer_kind.from_code1(aa).instance() for aa in fasta_str] 59.112 - return Sequence(monomers, name, description) 59.113 - 59.114 - @staticmethod 59.115 - def from_pdb_chain(chain): 59.116 - """ Returns Sequence with Monomers with link to Bio.PDB.Residue 59.117 - 59.118 - chain is Bio.PDB.Chain 59.119 - """ 59.120 - cappbuilder = CaPPBuilder() 59.121 - peptides = cappbuilder.build_peptides(chain) 59.122 - sequence = Sequence() 59.123 - sequence.pdb_chains = [chain] 59.124 - sequence.pdb_residues[chain] = {} 59.125 - sequence.pdb_secstr[chain] = {} 59.126 - for peptide in peptides: 59.127 - for ca_atom in peptide.get_ca_list(): 59.128 - residue = ca_atom.get_parent() 59.129 - try: 59.130 - monomer = AminoAcidType.from_pdb_residue(residue).instance() 59.131 - sequence.pdb_residues[chain][monomer] = residue 59.132 - sequence.monomers.append(monomer) 59.133 - except: 59.134 - print "Warning: unknown monomer in PDB: %s" % residue 59.135 - return sequence 59.136 - 59.137 - def pdb_auto_add(self, conformity_info=None, pdb_directory='./tmp'): 59.138 - """ Adds pdb information to each monomer 59.139 - 59.140 - Returns if information has been successfully added 59.141 - TODO: conformity_file 59.142 - 59.143 - id-format lava flow 59.144 - """ 59.145 - if not conformity_info: 59.146 - path = os.path.join(pdb_directory, self.name) 59.147 - if os.path.exists(path) and os.path.getsize(path): 59.148 - match = pdb_id_parse(self.name) 59.149 - self.pdb_chain_add(open(path), match['code'], 59.150 - match['chain'], match['model']) 59.151 - else: 59.152 - match = pdb_id_parse(self.name) 59.153 - if match: 59.154 - code = match['code'] 59.155 - pdb_filename = config.pdb_dir % code 59.156 - if not os.path.exists(pdb_filename) or not os.path.getsize(pdb_filename): 59.157 - url = config.pdb_url % code 59.158 - print "Download %s" % url 59.159 - pdb_file = open(pdb_filename, 'w') 59.160 - data = urllib2.urlopen(url).read() 59.161 - pdb_file.write(data) 59.162 - pdb_file.close() 59.163 - print "Save %s" % pdb_filename 59.164 - pdb_file = open(pdb_filename) 59.165 - self.pdb_chain_add(pdb_file, code, match['chain'], match['model']) 59.166 - 59.167 - def pdb_save(self, out_filename, pdb_chain): 59.168 - """ Saves pdb_chain to out_file """ 59.169 - class GlySelect(Select): 59.170 - def accept_chain(self, chain): 59.171 - if chain == pdb_chain: 59.172 - return 1 59.173 - else: 59.174 - return 0 59.175 - io = PDBIO() 59.176 - structure = chain.get_parent() 59.177 - io.set_structure(structure) 59.178 - io.save(out_filename, GlySelect()) 59.179 - 59.180 - 59.181 - def pdb_add_sec_str(self, pdb_chain): 59.182 - """ Add secondary structure data """ 59.183 - tmp_file = NamedTemporaryFile(delete=False) 59.184 - tmp_file.close() 59.185 - pdb_file = self.pdb_files[pdb_chain].name 59.186 - os.system("dsspcmbi %(pdb)s %(tmp)s" % {'pdb': pdb_file, 'tmp': tmp_file.name}) 59.187 - dssp, keys = make_dssp_dict(tmp_file.name) 59.188 - for monomer in self.monomers: 59.189 - if self.pdb_has(pdb_chain, monomer): 59.190 - residue = self.pdb_residues[pdb_chain][monomer] 59.191 - try: 59.192 - d = dssp[(pdb_chain.get_id(), residue.get_id())] 59.193 - self.pdb_secstr[pdb_chain][monomer] = d[1] 59.194 - except: 59.195 - print "No dssp information about %s at %s" % (monomer, pdb_chain) 59.196 - os.unlink(tmp_file.name) 59.197 - 59.198 - def pdb_has(self, chain, monomer): 59.199 - return chain in self.pdb_residues and monomer in self.pdb_residues[chain] 59.200 - 59.201 - def secstr_has(self, chain, monomer): 59.202 - return chain in self.pdb_secstr and monomer in self.pdb_secstr[chain]
60.1 --- a/pytale/dummy_pytale.py Mon Jan 24 21:40:10 2011 +0300 60.2 +++ b/pytale/dummy_pytale.py Tue Jan 25 16:03:00 2011 +0300 60.3 @@ -11,7 +11,7 @@ 60.4 1) MenuBar 60.5 2) Resizable panel with ListBox containing sequence names 60.6 3) Panel with RichTextBox containing the alignment itself 60.7 - 4) StatusBar 60.8 + 4) StatusBar 60.9 """ 60.10 def __init__(self, parent, title, size=(1000, 600)): 60.11 global settings 60.12 @@ -140,7 +140,7 @@ 60.13 end = self.text.XYToPosition(column+1, line) 60.14 self.text.SetStyle(start, end, self.settings.conservation_styles[conser]) 60.15 print 'colouring done' 60.16 - 60.17 + 60.18 60.19 class Settings(object): 60.20 def __init__(self):
61.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 61.2 +++ b/repeats/config.py Tue Jan 25 16:03:00 2011 +0300 61.3 @@ -0,0 +1,1 @@ 61.4 +min_intersection = 15
62.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 62.2 +++ b/repeats/repeat_joiner.py Tue Jan 25 16:03:00 2011 +0300 62.3 @@ -0,0 +1,262 @@ 62.4 +""" Lib to join pair repeats 62.5 + 62.6 +Throuot: [from, to) 62.7 +from < to 62.8 +ori: True for '+'-strand, - for '-'-strand 62.9 +""" 62.10 + 62.11 +import Queue 62.12 + 62.13 +from bx.intervals.intersection import Intersecter 62.14 + 62.15 +import config 62.16 + 62.17 +class Chromosome(Intersecter): 62.18 + """ non-cycled, linear chromosome 62.19 + 62.20 + Data: 62.21 + name 62.22 + """ 62.23 + def __init__(self, name): 62.24 + self.name = name 62.25 + 62.26 + def __repr__(self): 62.27 + return self.name 62.28 + 62.29 + __str__ = __repr__ 62.30 + 62.31 +class Interval(object): 62.32 + """ individual interval 62.33 + 62.34 + start 62.35 + end 62.36 + 62.37 + chromosome 62.38 + ori 62.39 + sib === thick_edge -- paired interval 62.40 + 62.41 + in interval_group (installes in init): 62.42 + group_real_start -- position of real_start() of this interval in group 62.43 + group_ori -- if (position of real_start() of this interval in the group) 62.44 + < (position of real_end()) 62.45 + """ 62.46 + 62.47 + def __init__(self, repeat_joiner, chromosome_name, pos_from, pos_to, ori=True): 62.48 + repeat_joiner._intervals.append(self) 62.49 + self.start = pos_from 62.50 + self.end = pos_to 62.51 + self.ori = ori 62.52 + self.group_real_start = None # for repr 62.53 + self.group_ori = None 62.54 + if chromosome_name not in repeat_joiner: 62.55 + repeat_joiner[chromosome_name] = Chromosome(chromosome_name) 62.56 + self.chromosome = repeat_joiner[chromosome_name] 62.57 + repeat_joiner[chromosome_name].insert_interval(self) 62.58 + 62.59 + @staticmethod 62.60 + def pair(interval_1, interval_2): 62.61 + """ connect these interval with thick edge """ 62.62 + interval_1.sib = interval_2 62.63 + interval_2.sib = interval_1 62.64 + 62.65 + def __eq__(self, other): 62.66 + return self.chromosome == other.chromosome and \ 62.67 + self.start == other.start and \ 62.68 + self.end == other.end and \ 62.69 + self.ori == other.ori 62.70 + 62.71 + def __ne__(self, other): 62.72 + return not (self == other) 62.73 + 62.74 + def tuple(self): 62.75 + return (self.start, self.end, self.ori, self.chromosome) 62.76 + 62.77 + def thin_edges(self, min_intersection=config.min_intersection): 62.78 + """ return all thin edges (intersected intervals) """ 62.79 + min_intersection -= 1 # bx intersecter already means intersection of length 1 62.80 + if len(self) < min_intersection: 62.81 + return [] 62.82 + pos_start = self.start + min_intersection 62.83 + pos_end = self.end - min_intersection 62.84 + intersected = [] 62.85 + if pos_end <= pos_start: 62.86 + pos_end, pos_start = pos_start, pos_end 62.87 + # fragments intersecting both pos_end and pos_start, whole [pos_start-1, pos_end+1) 62.88 + left = self.chromosome.find(pos_start-1, pos_start) 62.89 + right = self.chromosome.find(pos_end, pos_end+1) 62.90 + intersected = set(left) & set(right) 62.91 + else: 62.92 + intersected = self.chromosome.find(pos_start, pos_end) 62.93 + return filter(lambda r: r != self, intersected) 62.94 + 62.95 + def edges(self, min_intersection=config.min_intersection): 62.96 + """ visit thick edge at first and then all thin edges """ 62.97 + return [self.sib] + self.thin_edges(min_intersection) 62.98 + 62.99 + @property 62.100 + def real_start(self): 62.101 + """ real start of interval (depends on ori) """ 62.102 + if self.ori: 62.103 + return self.start 62.104 + else: 62.105 + return self.end - 1 62.106 + 62.107 + @property 62.108 + def real_end(self): 62.109 + """ real end of interval (depends on ori) """ 62.110 + if self.ori: 62.111 + return self.end 62.112 + else: 62.113 + return self.start - 1 62.114 + 62.115 + def __len__(self): 62.116 + """ length of interval """ 62.117 + return self.end - self.start 62.118 + 62.119 + @property 62.120 + def group_real_end(self): 62.121 + """ reletive end of interval """ 62.122 + if self.group_ori: 62.123 + return self.group_real_start + len(self) 62.124 + else: 62.125 + return self.group_real_start - len(self) 62.126 + 62.127 + @property 62.128 + def group_start(self): 62.129 + """ min of interval positions in group """ 62.130 + if self.group_ori: 62.131 + return self.group_real_start 62.132 + else: 62.133 + return self.group_real_end + 1 62.134 + 62.135 + @property 62.136 + def group_end(self): 62.137 + """ max of interval positions in group """ 62.138 + if self.group_ori: 62.139 + return self.group_real_end 62.140 + else: 62.141 + return self.group_real_start + 1 62.142 + 62.143 + def inherit_from(self, other): 62.144 + """ Use other as source interval to set group_ori and group_real_start of this """ 62.145 + if other == self.sib: 62.146 + self.group_real_start = other.group_real_start 62.147 + self.group_ori = other.group_ori 62.148 + else: 62.149 + chromosome_ori = other.ori == other.group_ori 62.150 + self.group_ori = self.ori == chromosome_ori 62.151 + delta = self.real_start - other.real_start 62.152 + if chromosome_ori: 62.153 + self.group_real_start = other.group_real_start + delta 62.154 + else: 62.155 + self.group_real_start = other.group_real_start - delta 62.156 + 62.157 + def __repr__(self): 62.158 + if self.group_real_start != None: 62.159 + return "%s: [%i(%i), %i(%i))" % (self.chromosome, self.real_start, 62.160 + self.group_real_start, self.real_end, self.group_real_end) 62.161 + else: 62.162 + return "%s: [%i, %i)" % (self.chromosome, self.real_start, self.real_end) 62.163 + 62.164 + def __str__(self): 62.165 + """ returns string to put into table file. IMPORTANT: [start, end] """ 62.166 + ori = 1 if self.ori else -1 62.167 + if self.group_real_start != None: 62.168 + group_ori = 1 if self.group_ori else -1 62.169 + return "%s %i %i %i %i %i %i" % (self.chromosome, self.start, 62.170 + self.end-1, self.group_start, self.group_end-1, ori, group_ori) 62.171 + else: 62.172 + return "%s %i %i %i" % (self.chromosome, self.start, 62.173 + self.end, ori) 62.174 + 62.175 +class IntervalGroup(list): 62.176 + """ list of intervals """ 62.177 + 62.178 + def __init__(self): 62.179 + self.start = 0 62.180 + 62.181 + def calc_start(self): 62.182 + """ shift all grou coordinates to make min position zero """ 62.183 + offset = -min(min(interval.group_real_start, interval.group_real_end) 62.184 + for interval in self) 62.185 + for interval in self: 62.186 + interval.group_real_start += offset 62.187 + 62.188 +class RepeatJoiner(dict): 62.189 + """ dictionary {<chromosome name>: chromosome} 62.190 + 62.191 + Data: 62.192 + interval_groups -- list of interval_groups 62.193 + _intervals = [] 62.194 + 62.195 + >>> rj = RepeatJoiner() 62.196 + >>> r1 = Interval(rj, '1', 10, 100) 62.197 + >>> r2 = Interval(rj, '1', 50, 150) 62.198 + >>> Interval.pair(r1, r2) 62.199 + >>> rj.keys() 62.200 + ['1'] 62.201 + >>> rj.build_groups() 62.202 + >>> len(set(rj.interval_groups[0])) 62.203 + 2 62.204 + >>> print rj.interval_groups 62.205 + [[1: [10(0), 100(90)), 1: [50(0), 150(100))]] 62.206 + 62.207 + >>> rj = RepeatJoiner() 62.208 + >>> Interval.pair(Interval(rj, '1', 10, 100), Interval(rj, '1', 110, 200)) 62.209 + >>> Interval.pair(Interval(rj, '1', 30, 120), Interval(rj, '1', 7110, 7200)) 62.210 + >>> rj.build_groups() 62.211 + >>> print rj.interval_groups 62.212 + [[1: [10(0), 100(90)), 1: [110(0), 200(90)), 1: [30(20), 120(110)), 1: [7110(20), 7200(110))]] 62.213 + 62.214 + >>> rj = RepeatJoiner() 62.215 + >>> Interval.pair(Interval(rj, '1', 10, 100), Interval(rj, '1', 110, 200)) 62.216 + >>> Interval.pair(Interval(rj, '1', 6030, 6120), Interval(rj, '1', 7110, 7200)) 62.217 + >>> rj.build_groups() 62.218 + >>> print rj.interval_groups 62.219 + [[1: [10(0), 100(90)), 1: [110(0), 200(90))], [1: [6030(0), 6120(90)), 1: [7110(0), 7200(90))]] 62.220 + 62.221 + >>> rj = RepeatJoiner() 62.222 + >>> Interval.pair(Interval(rj, '1', 10, 50), Interval(rj, '1', 110, 150)) 62.223 + >>> Interval.pair(Interval(rj, '1', 140, 180), Interval(rj, '1', 1000, 1040)) 62.224 + >>> rj.build_groups() 62.225 + >>> print rj.interval_groups 62.226 + [[1: [10(0), 50(40)), 1: [110(0), 150(40))], [1: [140(0), 180(40)), 1: [1000(0), 1040(40))]] 62.227 + 62.228 + >>> rj = RepeatJoiner() 62.229 + >>> Interval.pair(Interval(rj, '1', 10, 100), Interval(rj, '1', 110, 200)) 62.230 + >>> Interval.pair(Interval(rj, '1', 30, 120, ori=False), Interval(rj, '1', 7110, 7200, ori=False)) 62.231 + >>> rj.build_groups() 62.232 + >>> print rj.interval_groups 62.233 + [[1: [10(0), 100(90)), 1: [110(0), 200(90)), 1: [119(109), 29(19)), 1: [7199(109), 7109(19))]] 62.234 + 62.235 + """ 62.236 + def __init__(self): 62.237 + self.interval_groups = [] 62.238 + self._intervals = [] 62.239 + 62.240 + def build_groups(self): 62.241 + """ build interval groups """ 62.242 + used = set() 62.243 + for interval in self._intervals: 62.244 + if interval not in used: 62.245 + interval.group_ori = True 62.246 + interval.group_real_start = interval.real_start 62.247 + interval_group = IntervalGroup() 62.248 + self.interval_groups.append(interval_group) 62.249 + q = Queue.Queue() 62.250 + q.put(interval) 62.251 + while not q.empty(): 62.252 + r = q.get() 62.253 + if r not in used: 62.254 + used.add(r) 62.255 + interval_group.append(r) 62.256 + for r1 in r.edges(): 62.257 + if r1 not in used: 62.258 + q.put(r1) 62.259 + r1.inherit_from(r) 62.260 + interval_group.calc_start() 62.261 + 62.262 + 62.263 +if __name__ == '__main__': 62.264 + import doctest 62.265 + doctest.testmod()
63.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 63.2 +++ b/repeats/repeats.py Tue Jan 25 16:03:00 2011 +0300 63.3 @@ -0,0 +1,63 @@ 63.4 +""" 63.5 +Repeats joining tool 63.6 + 63.7 +Rereats input format: 63.8 + input_id pos_from pos_to 63.9 +""" 63.10 + 63.11 +from allpy import config, alignment, block, sequence 63.12 +Sequence = sequence.Sequence 63.13 +Block = block.Block 63.14 +Alignment = alignment.Alignment 63.15 +import argparse 63.16 +import os 63.17 +from tempfile import NamedTemporaryFile 63.18 + 63.19 +r = argparse.FileType('r') 63.20 +w = argparse.FileType('w') 63.21 + 63.22 +p = argparse.ArgumentParser( 63.23 +description='Repeats joining tool', 63.24 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 63.25 +#~ argument_default=argparse.SUPPRESS, 63.26 +) 63.27 + 63.28 +p.add_argument('-v','--version',action='version',version='%(prog)s 1.0') 63.29 +p.add_argument('-i',help='Input fasta file with genome',metavar='FILE',type=r,required=True) 63.30 +p.add_argument('-I',help='Input text file with repeats',metavar='FILE',type=r,required=True) 63.31 +p.add_argument('-n',help='Fasta identifier',metavar='name',required=True) 63.32 +p.add_argument('-r',help='Repeat identifier',metavar='name',type=int,required=True) 63.33 +p.add_argument('-o',help='Output alignment file',metavar='FILE',type=w,required=True) 63.34 + 63.35 +tmp_file = None 63.36 + 63.37 +try: 63.38 + args = p.parse_args() 63.39 + repeat_copies_pos = [] 63.40 + for line in p.i: 63.41 + line = line.strip() 63.42 + if not line: 63.43 + continue 63.44 + try: 63.45 + repeat_type, pos_from, pos_to = line.strip() 63.46 + pos_from = int(pos_from) 63.47 + pos_to = int(pos_to) 63.48 + except: 63.49 + print "Warning: wrong input line '%s'" % line 63.50 + if repeat_type == p.r: 63.51 + repeat_copies_pos.append((pos_from, pos_to)) 63.52 + repeat_copies = [] 63.53 + for pos_from, pos_to in repeat_copies_pos: 63.54 + seq = Sequence.file_slice(p.I, pos_from, pos_to, p.r 63.55 + repeat_copies.append(seq) 63.56 + alignment = Alignment.from_sequences(*repeat_copies) 63.57 + alignment.muscle_align() 63.58 + alignment.save_fasta(p.o) 63.59 + 63.60 +except Exception, t: 63.61 + print t 63.62 + exit() 63.63 + 63.64 +if tmp_file: 63.65 + os.unlink(tmp_file) 63.66 +
64.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 64.2 +++ b/repeats/test.py Tue Jan 25 16:03:00 2011 +0300 64.3 @@ -0,0 +1,38 @@ 64.4 + 64.5 +import sys 64.6 +import pprint 64.7 + 64.8 +from repeat_joiner import Interval, RepeatJoiner 64.9 + 64.10 +rj = RepeatJoiner() 64.11 +for line in open(sys.argv[1]): 64.12 + line = line.strip() 64.13 + if line: 64.14 + c1, c2, from1, to1, from2, to2, ori1, ori2 = line.split()[:8] 64.15 + if c1 == 'DNA_1': 64.16 + continue # first line 64.17 + ori1 = True if int(ori1) == 1 else False 64.18 + ori2 = True if int(ori2) == 1 else False 64.19 + from1 = int(from1) 64.20 + to1 = int(to1) + 1 64.21 + from2 = int(from2) 64.22 + to2 = int(to2) + 1 64.23 + 64.24 + r1 = Interval(rj, c1, from1, to1, ori1) 64.25 + r2 = Interval(rj, c2, from2, to2, ori2) 64.26 + Interval.pair(r1, r2) 64.27 + 64.28 +rj.build_groups() 64.29 +rj.interval_groups.sort(key=lambda g: len(g), reverse=True) 64.30 + 64.31 + 64.32 +print "group\tchr\tchr_from\tchr_to\tgroup_from\tgroup_to\tori\tgroup_ori" 64.33 +for i, interval_group in enumerate(rj.interval_groups): 64.34 + interval_group.sort(key=lambda i: i.group_start) 64.35 + prev = set() 64.36 + for interval in interval_group: 64.37 + if interval.tuple() in prev: 64.38 + continue 64.39 + prev.add(interval.tuple()) 64.40 + print "%i\t%s" % (i, str(interval).replace(' ', '\t')) 64.41 +
65.1 --- a/sandbox/bufferedcanvas.py Mon Jan 24 21:40:10 2011 +0300 65.2 +++ b/sandbox/bufferedcanvas.py Tue Jan 25 16:03:00 2011 +0300 65.3 @@ -23,9 +23,9 @@ 65.4 published by the Free Software Foundation; either version 2.1 of the 65.5 License, or (at your option) any later version. 65.6 65.7 -As a special exception, the copyright holders of this library 65.8 +As a special exception, the copyright holders of this library 65.9 hereby recind Section 3 of the GNU Lesser General Public License. This 65.10 -means that you MAY NOT apply the terms of the ordinary GNU General 65.11 +means that you MAY NOT apply the terms of the ordinary GNU General 65.12 Public License instead of this License to any given copy of the 65.13 Library. This has been done to prevent users of the Library from being 65.14 denied access or the ability to use future improvements.
66.1 --- a/sandbox/gtk-text.py Mon Jan 24 21:40:10 2011 +0300 66.2 +++ b/sandbox/gtk-text.py Tue Jan 25 16:03:00 2011 +0300 66.3 @@ -14,7 +14,7 @@ 66.4 66.5 def __init__(self): 66.6 window = gtk.Window(gtk.WINDOW_TOPLEVEL) 66.7 - window.set_resizable(True) 66.8 + window.set_resizable(True) 66.9 window.connect("destroy", self.close_application) 66.10 66.11 sw = gtk.ScrolledWindow() 66.12 @@ -95,7 +95,7 @@ 66.13 66.14 def main(): 66.15 gtk.main() 66.16 - return 0 66.17 + return 0 66.18 66.19 if __name__ == "__main__": 66.20 TextViewExample()
67.1 --- a/sandbox/tk-text.py Mon Jan 24 21:40:10 2011 +0300 67.2 +++ b/sandbox/tk-text.py Tue Jan 25 16:03:00 2011 +0300 67.3 @@ -26,7 +26,7 @@ 67.4 # for i in xrange(len(body)): 67.5 # t.insert('end', body[i], 'c%d' % ids[i]) 67.6 # t.insert('end', '\n') 67.7 -# 67.8 +# 67.9 # for i in xrange(11): 67.10 # c = i * 255 // 10 67.11 # t.tag_configure('c%d' % i, background='#%02x%02x%02x' % (c,c,c)) 67.12 @@ -39,10 +39,10 @@ 67.13 # t.insert('end', body[i], 'l%dc%d' % (y, ids[i])) 67.14 # t.insert('end', '\n') 67.15 # root.update() 67.16 -# 67.17 +# 67.18 # print "text created" 67.19 # root.update() 67.20 -# 67.21 +# 67.22 # for y in xrange(len(seqs)): 67.23 # for i in xrange(11): 67.24 # c = i * 255 // 10
68.1 --- a/sandbox/ttk.py Mon Jan 24 21:40:10 2011 +0300 68.2 +++ b/sandbox/ttk.py Tue Jan 25 16:03:00 2011 +0300 68.3 @@ -78,7 +78,7 @@ 68.4 global seqs 68.5 names.delete(0, 'end') 68.6 sequences.delete('1.0', 'end') 68.7 - 68.8 + 68.9 filename = filedialog.askopenfilename() 68.10 seqs = set() 68.11 for item in open(filename).read().split('\n>'):
69.1 --- a/sandbox/wx-textctrl.py Mon Jan 24 21:40:10 2011 +0300 69.2 +++ b/sandbox/wx-textctrl.py Tue Jan 25 16:03:00 2011 +0300 69.3 @@ -31,7 +31,7 @@ 69.4 # for i in xrange(11): 69.5 # c = i * 255 // 10 69.6 # attrs.append(wx.TextAttr('black', '#%02x%02x%02x' % (c,c,c))) 69.7 -# 69.8 +# 69.9 # for name, body, ids, colors in seqs: 69.10 # for x in xrange(len(body)): 69.11 # text.SetDefaultStyle(attrs[ids[x]]) 69.12 @@ -40,7 +40,7 @@ 69.13 69.14 # ## v4: output text, then colorize 69.15 # text.WriteText("\n".join(body for name, body, ids, colors in seqs)) 69.16 -# 69.17 +# 69.18 # i = 0 69.19 # text.Freeze() 69.20 # for name, body, ids, colors in seqs:
70.1 --- a/sec_str/sec_str.py Mon Jan 24 21:40:10 2011 +0300 70.2 +++ b/sec_str/sec_str.py Tue Jan 25 16:03:00 2011 +0300 70.3 @@ -4,12 +4,12 @@ 70.4 70.5 """ 70.6 70.7 -from allpy.lib import config, project, block 70.8 +from allpy import config, alignment, block 70.9 Block = block.Block 70.10 -Project = project.Project 70.11 +Alignment = alignment.Alignment 70.12 import argparse 70.13 import sys 70.14 -from allpy.lib.fasta import determine_long_line 70.15 +from allpy.fasta import determine_long_line 70.16 70.17 r = argparse.FileType('r') 70.18 w = argparse.FileType('w') 70.19 @@ -17,7 +17,7 @@ 70.20 70.21 p = argparse.ArgumentParser( 70.22 description='Secondary structure mapping tool.', 70.23 -formatter_class=argparse.ArgumentDefaultsHelpFormatter, 70.24 +formatter_class=argparse.ArgumentDefaultsHelpFormatter, 70.25 #~ argument_default=argparse.SUPPRESS, 70.26 ) 70.27 70.28 @@ -30,7 +30,7 @@ 70.29 tmp_file = None 70.30 70.31 try: 70.32 - project = Project(args.i) 70.33 + alignment = Alignment(args.i) 70.34 except: 70.35 args.i.close() 70.36 tmp_file = NamedTemporaryFile(delete=False) 70.37 @@ -38,12 +38,11 @@ 70.38 os.system('seqret %(msf)s %(fasta)s' % \ 70.39 {'msf': args.i.name, 'fasta': tmp_file.name}) 70.40 args.i = open(tmp_file.name) 70.41 - project = Project(args.i) 70.42 + alignment = Alignment(args.i) 70.43 args.i.seek(0) 70.44 long_line = determine_long_line(args.i) 70.45 70.46 -#~ project.pdb_auto_add() 70.47 -block = Block(project) 70.48 +block = Block(alignment) 70.49 70.50 args.i.seek(0) 70.51 f = args.f 70.52 @@ -53,7 +52,7 @@ 70.53 sequence.pdb_auto_add() 70.54 chain = sequence.pdb_chains[0] 70.55 sequence.pdb_add_sec_str(chain) 70.56 - project.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line) 70.57 + alignment.save_secstr(f, sequence, chain, "%s_ss" % sequence.name, long_line=long_line) 70.58 sequence.pdb_unload() 70.59 f.close() 70.60
71.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 71.2 +++ b/setup.py Tue Jan 25 16:03:00 2011 +0300 71.3 @@ -0,0 +1,8 @@ 71.4 +from distutils.core import setup 71.5 +setup( 71.6 + name='allpy', 71.7 + version='0.1', 71.8 + packages=['allpy'], 71.9 +) 71.10 + 71.11 +# vim: set noet:
72.1 --- a/test/test.py Mon Jan 24 21:40:10 2011 +0300 72.2 +++ b/test/test.py Tue Jan 25 16:03:00 2011 +0300 72.3 @@ -1,9 +1,9 @@ 72.4 import sys 72.5 72.6 -from allpy.lib.project import Project 72.7 -from allpy.lib.block import Block 72.8 +from allpy.alignment import Alignment 72.9 +from allpy.block import Block 72.10 72.11 -p = Project(open('test.fasta')) 72.12 +p = Alignment(open('test.fasta')) 72.13 print "alignment length: %i" % len(p) 72.14 print "sequence: %s" % str(p.sequences[0]) 72.15
73.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 73.2 +++ b/test/usecase1.py Tue Jan 25 16:03:00 2011 +0300 73.3 @@ -0,0 +1,35 @@ 73.4 +from allpy import protein 73.5 + 73.6 +# Create sequences from string representation of sequence body 73.7 +sequence_1 = protein.Sequence.from_string("mkstf", name="E2E4") 73.8 +sequence_2 = protein.Sequence.from_string("mstkfff", description="Longer sequence") 73.9 + 73.10 +# Create alignment from sequences 73.11 +alignment = protein.Alignment() 73.12 +alignment.append_sequence(sequence_1) 73.13 +alignment.append_sequence(sequence_2) 73.14 +alignment.realign("muscle") 73.15 + 73.16 +# For each sequence, print number of gaps and non-gaps in alignment 73.17 +for row in alignment.rows(): 73.18 + gaps = 0 73.19 + monomers = 0 73.20 + for column in alignment.columns: 73.21 + if column in row: 73.22 + monomers += 1 73.23 + else: 73.24 + gaps += 1 73.25 + print "%s: %s gaps, %s non-gaps" % (row.sequence.name, gaps, monomers) 73.26 + 73.27 +# Print number of gaps in each column 73.28 +gaps = [] 73.29 +for column in alignment.columns: 73.30 + column_gaps = 0 73.31 + for sequence in alignment.sequences: 73.32 + if sequence not in column: 73.33 + column_gaps += 1 73.34 + gaps.append(column_gaps) 73.35 +print " ".join(map(str, gaps)) 73.36 + 73.37 +# Write alignment to file 73.38 +alignment.to_fasta(open("new_file.fasta", "w"))
74.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 74.2 +++ b/test/usecase2.py Tue Jan 25 16:03:00 2011 +0300 74.3 @@ -0,0 +1,75 @@ 74.4 +# Fragments are in pair_repeat.fasta 74.5 +from allpy import dna 74.6 +width = 15 74.7 +treshold = 14 74.8 + 74.9 +def my_column_mark(column, threshold): 74.10 + """Helper to mark column (given as dict) by identity.""" 74.11 + count = {} 74.12 + for sequence, monomer in column: 74.13 + code = monomer.code1 74.14 + count[code] = count.get(code, 0) + 1 74.15 + for code in count: 74.16 + if count[code] > threshold: 74.17 + return "+" 74.18 + return "-" 74.19 + 74.20 +def my_pair_mark(column): 74.21 + """Helper to mark column of 2 sequences (given as list) by identity.""" 74.22 + if column[0] is None or column[1] is None: 74.23 + return "-" 74.24 + if column[0].code1 == column[0].code1: 74.25 + return "+" 74.26 + return "-" 74.27 + 74.28 +def find_runs(markup): 74.29 + """Fund long positive runs. 74.30 + 74.31 + This obscure and probably broken function has nothing to do with allpy, 74.32 + so it's presence in the example is unnecessary. 74.33 + """ 74.34 + position = 0 74.35 + count = 0 74.36 + plus_positions=[] 74.37 + for i in range(len(markup)): 74.38 + position += 1 74.39 + if position < width : 74.40 + if markup[i]=="+": 74.41 + count += 1 74.42 + continue 74.43 + if position > width: 74.44 + if markup[i-width] == "+": 74.45 + count -=1 74.46 + if count >= treshold: 74.47 + plus_positions.append(position) 74.48 + if len(plus_positions)==0: 74.49 + raise Exception("No blocks in alignment") 74.50 + 74.51 + blocks=[] 74.52 + start = plus_positions[0]-width + 1 74.53 + stop = plus_positions[0] 74.54 + for p in plus_positions[1:]: 74.55 + if p == stop +1: 74.56 + stop = p 74.57 + continue 74.58 + blocks.append((start,stop)) 74.59 + start = p - width + 1 74.60 + stop = p 74.61 + return blocks 74.62 + 74.63 +def main(): 74.64 + alignment = dna.Alignment.from_file(open("pair_repeat.fasta")) 74.65 + if len(alignment.sequences) != 2: 74.66 + raise Exception("Input must have exactly 2 sequences!") 74.67 + alignment.realign("needle", gap_open = 0) 74.68 + markup = [] 74.69 + for column in alignment.columns_as_lists(): 74.70 + markup.append(my_pair_mark(column)) 74.71 + markup = alignment.map_columns(my_pair_mark) 74.72 + print find_runs(markup) 74.73 + 74.74 +try: 74.75 + main() 74.76 +except Exception, e: 74.77 + print "An error has occured:", e 74.78 +
75.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 75.2 +++ b/test/usecase3.py Tue Jan 25 16:03:00 2011 +0300 75.3 @@ -0,0 +1,36 @@ 75.4 +from allpy import protein 75.5 +alignment = protein.Alignment.from_file(open("aln.fasta")) 75.6 +#conservative = [(10,20), (40,50)] 75.7 +conservative = [(0,6),(18,37)] 75.8 + 75.9 +def ranges_to_markup(ranges): 75.10 + """Convert list of ranges to line of markup. 75.11 + 75.12 + This has nothing to do with allpy. 75.13 + """ 75.14 + markup = ["-"] * len(alignment.columns) 75.15 + for begin, end in ranges: 75.16 + for i in range(begin, end+1): 75.17 + markup[i] = "+" 75.18 + return "".join(markup) 75.19 + 75.20 +def markup_to_blocks(markup): 75.21 + """Convert markup line to a bunch of blocks, one for each sequential run.""" 75.22 + current = None 75.23 + blocks = {} 75.24 + for mark, column in zip(markup, alignment.columns): 75.25 + if mark != current: 75.26 + block = protein.Block.from_alignment(alignment, columns=[]) 75.27 + blocks[mark] = blocks.get(mark, []) + [block] 75.28 + current = mark 75.29 + blocks[mark][-1].columns.append(column) 75.30 + return blocks 75.31 + 75.32 +def main(): 75.33 + markup = ranges_to_markup(conservative) 75.34 + blocks = markup_to_blocks(markup) 75.35 + for block in blocks["-"]: 75.36 + block.flush_left() 75.37 + alignment.to_fasta(open("output.fasta", "w")) 75.38 + 75.39 +main()
76.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 76.2 +++ b/utils/flush_left.py Tue Jan 25 16:03:00 2011 +0300 76.3 @@ -0,0 +1,48 @@ 76.4 +#!/usr/bin/python 76.5 +"""Flush all monomers in given range to the left, all gaps to the right. 76.6 + 76.7 +All position indexes are counting from 1. 76.8 +""" 76.9 +import optparse 76.10 +import sys 76.11 +import os 76.12 +from allpy import protein 76.13 + 76.14 +def main(): 76.15 + alignment = protein.Alignment.from_file(open(options.in_file)) 76.16 + if not options.begin: 76.17 + options.begin = 1 76.18 + if not options.end: 76.19 + options.end = len(alignment.columns) 76.20 + columns = alignment.columns[options.begin-1:options.end] 76.21 + block = protein.Block.from_alignment(alignment, columns=columns) 76.22 + block.flush_left() 76.23 + alignment.to_fasta(open(options.out_file, "w")) 76.24 + if options.msf: 76.25 + os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf") 76.26 + os.system("rm " + options.out_file) 76.27 + 76.28 +if __name__ == "__main__": 76.29 + usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip()) 76.30 + parser = optparse.OptionParser(usage=usage) 76.31 + parser.add_option("-i", "--in-file", 76.32 + help="Input alignment file (in FASTA format)") 76.33 + parser.add_option("-o", "--out-file", 76.34 + help="Output file") 76.35 + parser.add_option("-b", "--begin", type=int, 76.36 + help="Position in alignment to start from") 76.37 + parser.add_option("-e", "--end", type=int, 76.38 + help="Position in alignment to end with") 76.39 + parser.add_option("-m", "--msf", action='store_true', 76.40 + help="Output in MSF format (FASTA by default)") 76.41 + 76.42 + options, args = parser.parse_args() 76.43 + 76.44 + if args: 76.45 + parser.error("We take no positional arguments.") 76.46 + if not options.in_file or not options.out_file: 76.47 + parser.error("Both -i and -o parameters must be given.") 76.48 + 76.49 + main() 76.50 + 76.51 +# vim: set et ts=4 sts=4 sw=4:
77.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 77.2 +++ b/utils/flush_left_vblock.py Tue Jan 25 16:03:00 2011 +0300 77.3 @@ -0,0 +1,48 @@ 77.4 +#!/usr/bin/python 77.5 +"""Flush all monomers in given range to the left, all gaps to the right. 77.6 + 77.7 +All position indexes are counting from 1. 77.8 +""" 77.9 +import optparse 77.10 +import sys 77.11 +import os 77.12 +from allpy import protein 77.13 + 77.14 +def main(): 77.15 + alignment = protein.Alignment.from_file(open(options.in_file)) 77.16 + if not options.begin: 77.17 + options.begin = 1 77.18 + if not options.end: 77.19 + options.end = len(alignment.columns) 77.20 + columns = alignment.columns[options.begin-1:options.end] 77.21 + block = protein.Block.from_alignment(alignment, columns=columns) 77.22 + block.flush_left() 77.23 + alignment.to_fasta(open(options.out_file, "w")) 77.24 + if options.msf: 77.25 + os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf") 77.26 + os.system("rm " + options.out_file) 77.27 + 77.28 +if __name__ == "__main__": 77.29 + usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip()) 77.30 + parser = optparse.OptionParser(usage=usage) 77.31 + parser.add_option("-i", "--in-file", 77.32 + help="Input alignment file (in FASTA format)") 77.33 + parser.add_option("-o", "--out-file", 77.34 + help="Output file") 77.35 + parser.add_option("-b", "--begin", type=int, 77.36 + help="Position in alignment to start from") 77.37 + parser.add_option("-e", "--end", type=int, 77.38 + help="Position in alignment to end with") 77.39 + parser.add_option("-m", "--msf", action='store_true', 77.40 + help="Output in MSF format (FASTA by default)") 77.41 + 77.42 + options, args = parser.parse_args() 77.43 + 77.44 + if args: 77.45 + parser.error("We take no positional arguments.") 77.46 + if not options.in_file or not options.out_file: 77.47 + parser.error("Both -i and -o parameters must be given.") 77.48 + 77.49 + main() 77.50 + 77.51 +# vim: set et ts=4 sts=4 sw=4:
78.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 78.2 +++ b/utils/flush_left_vblocks.py Tue Jan 25 16:03:00 2011 +0300 78.3 @@ -0,0 +1,71 @@ 78.4 +#!/usr/bin/python 78.5 +"""Flush all monomers in given ranges to the left, all gaps to the right. 78.6 + 78.7 +All position indexes are counting from 1. 78.8 + 78.9 +File with ranges format is as follows 78.10 +#from to 78.11 +10 15 78.12 +107 121 78.13 +etc 78.14 + 78.15 +Lines with "#" in 1st position are skipped 78.16 + 78.17 +""" 78.18 +import optparse 78.19 +import sys 78.20 +import os 78.21 +from allpy import protein 78.22 + 78.23 +def main(ranges): 78.24 + alignment = protein.Alignment.from_file(open(options.in_file)) 78.25 + for begin, end in ranges: 78.26 + columns = alignment.columns[begin-1:end] 78.27 + block = protein.Block.from_alignment(alignment, columns=columns) 78.28 + block.flush_left() 78.29 + 78.30 + alignment.to_fasta(open(options.out_file, "w")) 78.31 + if options.msf: 78.32 + os.system("seqret " + options.out_file + " msf::" + options.out_file.split(".")[0] + ".msf") 78.33 + os.system("rm " + options.out_file) 78.34 + 78.35 +def ranges(): 78.36 + ranges = [] 78.37 + for line_no, line in enumerate(open(options.ranges), 1): 78.38 + if line.strip() == "": 78.39 + continue 78.40 + if line[0] == "#": 78.41 + continue 78.42 + try: 78.43 + begin, end = line.strip().split() 78.44 + begin = int(begin) 78.45 + end = int(end) 78.46 + except Exception: 78.47 + print "Warning: wrong format in line %s, ignoring" % line_no 78.48 + continue 78.49 + ranges.append( (begin, end) ) 78.50 + return ranges 78.51 + 78.52 + 78.53 +if __name__ == "__main__": 78.54 + usage = "Usage: %s [options]\n\n%s" % (sys.argv[0], __doc__.strip()) 78.55 + parser = optparse.OptionParser(usage=usage) 78.56 + parser.add_option("-i", "--in-file", 78.57 + help="Input alignment file (in FASTA format)") 78.58 + parser.add_option("-o", "--out-file", 78.59 + help="Output file") 78.60 + parser.add_option("-r", "--ranges", 78.61 + help="Input file with ranges") 78.62 + parser.add_option("-m", "--msf", action='store_true', 78.63 + help="Output in MSF format (FASTA by default)") 78.64 + 78.65 + options, args = parser.parse_args() 78.66 + 78.67 + if args: 78.68 + parser.error("We take no positional arguments.") 78.69 + if not options.in_file or not options.out_file: 78.70 + parser.error("Both -i and -o parameters must be given.") 78.71 + 78.72 + main(ranges()) 78.73 + 78.74 +# vim: set et ts=4 sts=4 sw=4: